23 __all__ = (
"RawIngestTask",
"RawIngestConfig",
"makeTransferChoiceField")
27 from dataclasses
import dataclass
28 from typing
import List, Dict, Iterator, Iterable, Type, Optional, Any, Mapping
29 from collections
import defaultdict
30 from multiprocessing
import Pool
32 from astro_metadata_translator
import ObservationInfo, fix_header, merge_headers
34 from lsst.afw.fits
import readMetadata
35 from lsst.daf.butler
import (
44 from lsst.geom
import Box2D
45 from lsst.pex.config
import Config, Field, ChoiceField
46 from lsst.pipe.base
import Task
47 from lsst.sphgeom
import ConvexPolygon
49 from .fitsRawFormatterBase
import FitsRawFormatterBase
54 """Structure that holds information about a single raw file, used during 58 dataId: DataCoordinate
59 """Data ID for this file (`lsst.daf.butler.DataCoordinate`). 61 This may be a minimal `~lsst.daf.butler.DataCoordinate` base instance, or 62 a complete `~lsst.daf.butler.ExpandedDataCoordinate`. 65 obsInfo: ObservationInfo
66 """Standardized observation metadata extracted directly from the file 67 headers (`astro_metadata_translator.ObservationInfo`). 71 """Region on the sky covered by this file, possibly with padding 72 (`lsst.sphgeom.ConvexPolygon`). 76 """Name of the file this information was extracted from (`str`). 78 This is the path prior to ingest, not the path after ingest. 81 FormatterClass: Type[FitsRawFormatterBase]
82 """Formatter class that should be used to ingest this file and compute 83 a spatial region for it (`type`; as subclass of `FitsRawFormatterBase`). 89 """Structure that holds information about a complete raw exposure, used 93 dataId: DataCoordinate
94 """Data ID for this exposure (`lsst.daf.butler.DataCoordinate`). 96 This may be a minimal `~lsst.daf.butler.DataCoordinate` base instance, or 97 a complete `~lsst.daf.butler.ExpandedDataCoordinate`. 100 files: List[RawFileData]
101 """List of structures containing file-level information. 104 records: Optional[Dict[str, List[DimensionRecord]]] =
None 105 """Dictionary containing `DimensionRecord` instances that must be inserted 106 into the `~lsst.daf.butler.Registry` prior to file-level ingest (`dict`). 108 Keys are the names of dimension elements ("exposure" and optionally "visit" 109 and "visit_detector_region"), while values are lists of `DimensionRecord`. 111 May be `None` during some ingest steps. 116 """Create a Config field with options for how to transfer files between 119 The allowed options for the field are exactly those supported by 120 `lsst.daf.butler.Datastore.ingest`. 125 Documentation for the configuration field. 129 field : `lsst.pex.config.ChoiceField` 135 allowed={
"move":
"move",
137 "hardlink":
"hard link",
138 "symlink":
"symbolic (soft) link"},
146 padRegionAmount = Field(
149 doc=
"Pad an image with specified number of pixels before calculating region" 152 doc=(
"Fully-qualified Python name of the `Instrument` subclass to " 153 "associate with all raws."),
161 """Driver Task for ingesting raw data into Gen3 Butler repositories. 163 This Task is intended to be runnable from the command-line, but it doesn't 164 meet the other requirements of CmdLineTask or PipelineTask, and wouldn't 165 gain much from being one. It also wouldn't really be appropriate as a 166 subtask of a CmdLineTask or PipelineTask; it's a Task essentially just to 167 leverage the logging and configurability functionality that provides. 169 Each instance of `RawIngestTask` writes to the same Butler. Each 170 invocation of `RawIngestTask.run` ingests a list of files. 174 config : `RawIngestConfig` 175 Configuration for the task. 176 butler : `~lsst.daf.butler.Butler` 177 Butler instance. Ingested Datasets will be created as part of 178 ``butler.run`` and associated with its Collection. 180 Additional keyword arguments are forwarded to the `lsst.pipe.base.Task` 183 Other keyword arguments are forwarded to the Task base class constructor. 186 ConfigClass = RawIngestConfig
188 _DefaultName =
"ingest" 191 """Return the DatasetType of the Datasets ingested by this Task. 193 return DatasetType(
"raw", (
"instrument",
"detector",
"exposure"),
"Exposure",
194 universe=self.
butler.registry.dimensions)
196 def __init__(self, config: Optional[RawIngestConfig] =
None, *, butler: Butler, **kwds: Any):
209 """Extract and process metadata from a single raw file. 219 A structure containing the metadata extracted from the file, 220 as well as the original filename. All fields will be populated, 221 but the `RawFileData.dataId` attribute will be a minimal 222 (unexpanded) `DataCoordinate` instance. 224 phdu = readMetadata(filename, 0)
225 header = merge_headers([phdu, readMetadata(filename)], mode=
"overwrite")
227 obsInfo = ObservationInfo(header)
228 dataId = DataCoordinate.standardize(instrument=obsInfo.instrument,
229 exposure=obsInfo.exposure_id,
230 detector=obsInfo.detector_num,
232 if obsInfo.instrument != self.
instrument.getName():
233 raise ValueError(f
"Incorrect instrument (expected {self.instrument.getName()}, " 234 f
"got {obsInfo.instrument}) for file {filename}.")
235 FormatterClass = self.
instrument.getRawFormatter(dataId)
236 if obsInfo.visit_id
is not None and obsInfo.tracking_radec
is not None:
237 formatter = FormatterClass.fromMetadata(metadata=header, obsInfo=obsInfo)
238 visitInfo = formatter.makeVisitInfo()
239 detector = self.
camera[obsInfo.detector_num]
240 wcs = formatter.makeWcs(visitInfo, detector)
241 pixBox = Box2D(detector.getBBox())
242 if self.config.padRegionAmount > 0:
243 pixBox.grow(self.config.padRegionAmount)
244 pixCorners = pixBox.getCorners()
245 sphCorners = [wcs.pixelToSky(point).getVector()
for point
in pixCorners]
246 region = ConvexPolygon(sphCorners)
249 return RawFileData(obsInfo=obsInfo, region=region, filename=filename,
250 FormatterClass=FormatterClass, dataId=dataId)
253 """Group an iterable of `RawFileData` by exposure. 257 files : iterable of `RawFileData` 258 File-level information to group. 262 exposures : `list` of `RawExposureData` 263 A list of structures that group the file-level information by 264 exposure. The `RawExposureData.records` attributes of elements 265 will be `None`, but all other fields will be populated. The 266 `RawExposureData.dataId` attributes will be minimal (unexpanded) 267 `DataCoordinate` instances. 269 exposureDimensions = self.
universe[
"exposure"].graph
270 byExposure = defaultdict(list)
272 byExposure[f.dataId.subset(exposureDimensions)].append(f)
275 for dataId, exposureFiles
in byExposure.items()]
278 """Collect the `DimensionRecord` instances that must be inserted into 279 the `~lsst.daf.butler.Registry` before an exposure's raw files may be. 283 exposure : `RawExposureData` 284 A structure containing information about the exposure to be 285 ingested. Should be considered consumed upon return. 289 exposure : `RawExposureData` 290 An updated version of the input structure, with 291 `RawExposureData.records` populated. 293 firstFile = exposure.files[0]
294 VisitDetectorRegionRecordClass = self.
universe[
"visit_detector_region"].RecordClass
298 if firstFile.obsInfo.visit_id
is not None:
299 exposure.records[
"visit_detector_region"] = []
301 for file
in exposure.files:
302 if file.obsInfo.visit_id != firstFile.obsInfo.visit_id:
303 raise ValueError(f
"Inconsistent visit/exposure relationship for " 304 f
"exposure {firstFile.obsInfo.exposure_id} between " 305 f
"{file.filename} and {firstFile.filename}: " 306 f
"{file.obsInfo.visit_id} != {firstFile.obsInfo.visit_id}.")
307 if file.region
is None:
308 self.log.warn(
"No region found for visit=%s, detector=%s.", file.obsInfo.visit_id,
309 file.obsInfo.detector_num)
311 visitVertices.extend(file.region.getVertices())
312 exposure.records[
"visit_detector_region"].append(
313 VisitDetectorRegionRecordClass.fromDict({
314 "instrument": file.obsInfo.instrument,
315 "visit": file.obsInfo.visit_id,
316 "detector": file.obsInfo.detector_num,
317 "region": file.region,
321 visitRegion = ConvexPolygon(visitVertices)
323 self.log.warn(
"No region found for visit=%s.", file.obsInfo.visit_id,
324 file.obsInfo.detector_num)
326 exposure.records[
"visit"] = [
332 """Expand the data IDs associated with a raw exposure to include 333 additional metadata records. 337 exposure : `RawExposureData` 338 A structure containing information about the exposure to be 339 ingested. Must have `RawExposureData.records` populated. Should 340 be considered consumed upon return. 344 exposure : `RawExposureData` 345 An updated version of the input structure, with 346 `RawExposureData.dataId` and nested `RawFileData.dataId` attributes 347 containing `~lsst.daf.butler.ExpandedDataCoordinate` instances. 349 hasVisit =
"visit" in data.records
353 data.dataId = self.
butler.registry.expandDataId(
360 "exposure": data.records[
"exposure"][0],
361 "visit": data.records[
"visit"][0]
if hasVisit
else None,
368 vdrRecords = data.records[
"visit_detector_region"]
if hasVisit
else itertools.repeat(
None)
369 for file, vdrRecord
in zip(data.files, vdrRecords):
370 file.dataId = self.
butler.registry.expandDataId(
372 records=dict(data.dataId.records, visit_detector_region=vdrRecord)
376 def prep(self, files, pool: Optional[Pool] =
None, processes: int = 1) -> Iterator[RawExposureData]:
377 """Perform all ingest preprocessing steps that do not involve actually 378 modifying the database. 382 files : iterable over `str` or path-like objects 383 Paths to the files to be ingested. Will be made absolute 384 if they are not already. 385 pool : `multiprocessing.Pool`, optional 386 If not `None`, a process pool with which to parallelize some 388 processes : `int`, optional 389 The number of processes to use. Ignored if ``pool`` is not `None`. 393 exposure : `RawExposureData` 394 Data structures containing dimension records, filenames, and data 395 IDs to be ingested (one structure for each exposure). 397 if pool
is None and processes > 1:
398 pool = Pool(processes)
399 mapFunc = map
if pool
is None else pool.imap_unordered
433 """Insert dimension records for one or more exposures. 437 records : `dict` mapping `str` to `list` 438 Dimension records to be inserted, organized as a mapping from 439 dimension name to a list of records for that dimension. This 440 may be a single `RawExposureData.records` dict, or an aggregate 441 for multiple exposures created by concatenating the value lists 442 of those dictionaries. 446 refs : `list` of `lsst.daf.butler.DatasetRef` 447 Dataset references for ingested raws. 456 for dimension
in (
"visit",
"exposure",
"visit_detector_region"):
457 recordsForDimension = records.get(dimension)
458 if recordsForDimension:
463 self.
butler.registry.insertDimensionData(dimension, *recordsForDimension)
466 ) -> List[DatasetRef]:
467 """Ingest all raw files in one exposure. 471 exposure : `RawExposureData` 472 A structure containing information about the exposure to be 473 ingested. Must have `RawExposureData.records` populated and all 474 data ID attributes expanded. 475 butler : `lsst.daf.butler.Butler`, optional 476 Butler to use for ingest. If not provided, ``self.butler`` will 481 refs : `list` of `lsst.daf.butler.DatasetRef` 482 Dataset references for ingested raws. 486 datasets = [FileDataset(path=os.path.abspath(file.filename),
488 formatter=file.FormatterClass)
489 for file
in exposure.files]
490 butler.ingest(*datasets, transfer=self.config.transfer)
491 return [ref
for dataset
in datasets
for ref
in dataset.refs]
493 def run(self, files, pool: Optional[Pool] =
None, processes: int = 1):
494 """Ingest files into a Butler data repository. 496 This creates any new exposure or visit Dimension entries needed to 497 identify the ingested files, creates new Dataset entries in the 498 Registry and finally ingests the files themselves into the Datastore. 499 Any needed instrument, detector, and physical_filter Dimension entries 500 must exist in the Registry before `run` is called. 504 files : iterable over `str` or path-like objects 505 Paths to the files to be ingested. Will be made absolute 506 if they are not already. 507 pool : `multiprocessing.Pool`, optional 508 If not `None`, a process pool with which to parallelize some 510 processes : `int`, optional 511 The number of processes to use. Ignored if ``pool`` is not `None`. 515 refs : `list` of `lsst.daf.butler.DatasetRef` 516 Dataset references for ingested raws. 520 This method inserts all records (dimensions and datasets) for an 521 exposure within a transaction, guaranteeing that partial exposures 524 exposureData = self.
prep(files, pool=pool, processes=processes)
535 for exposure
in exposureData:
536 with self.
butler.transaction():
def ingestExposureDatasets
def makeExposureRecordFromObsInfo(obsInfo, universe)
def makeVisitRecordFromObsInfo(obsInfo, universe, region=None)
def makeTransferChoiceField(doc="How to transfer files (None for no transfer).", default=None)
def collectDimensionRecords