23 __all__ = (
"RawIngestTask",
"RawIngestConfig",
"makeTransferChoiceField")
27 from dataclasses
import dataclass
28 from typing
import List, Dict, Iterator, Iterable, Type, Optional, Any, Mapping
29 from collections
import defaultdict
30 from multiprocessing
import Pool
32 from astro_metadata_translator
import ObservationInfo, fix_header, merge_headers
34 from lsst.afw.fits
import readMetadata
35 from lsst.daf.butler
import (
42 from lsst.daf.butler.instrument
import makeExposureRecordFromObsInfo, makeVisitRecordFromObsInfo
43 from lsst.geom
import Box2D
44 from lsst.pex.config
import Config, Field, ChoiceField
45 from lsst.pipe.base
import Task
46 from lsst.sphgeom
import ConvexPolygon
48 from .fitsRawFormatterBase
import FitsRawFormatterBase
53 """Structure that holds information about a single raw file, used during 57 dataId: DataCoordinate
58 """Data ID for this file (`lsst.daf.butler.DataCoordinate`). 60 This may be a minimal `~lsst.daf.butler.DataCoordinate` base instance, or 61 a complete `~lsst.daf.butler.ExpandedDataCoordinate`. 64 obsInfo: ObservationInfo
65 """Standardized observation metadata extracted directly from the file 66 headers (`astro_metadata_translator.ObservationInfo`). 70 """Region on the sky covered by this file, possibly with padding 71 (`lsst.sphgeom.ConvexPolygon`). 75 """Name of the file this information was extracted from (`str`). 77 This is the path prior to ingest, not the path after ingest. 80 FormatterClass: Type[FitsRawFormatterBase]
81 """Formatter class that should be used to ingest this file and compute 82 a spatial region for it (`type`; as subclass of `FitsRawFormatterBase`). 88 """Structure that holds information about a complete raw exposure, used 92 dataId: DataCoordinate
93 """Data ID for this exposure (`lsst.daf.butler.DataCoordinate`). 95 This may be a minimal `~lsst.daf.butler.DataCoordinate` base instance, or 96 a complete `~lsst.daf.butler.ExpandedDataCoordinate`. 99 files: List[RawFileData]
100 """List of structures containing file-level information. 103 records: Optional[Dict[str, List[DimensionRecord]]] =
None 104 """Dictionary containing `DimensionRecord` instances that must be inserted 105 into the `~lsst.daf.butler.Registry` prior to file-level ingest (`dict`). 107 Keys are the names of dimension elements ("exposure" and optionally "visit" 108 and "visit_detector_region"), while values are lists of `DimensionRecord`. 110 May be `None` during some ingest steps. 115 """Create a Config field with options for how to transfer files between 118 The allowed options for the field are exactly those supported by 119 `lsst.daf.butler.Datastore.ingest`. 124 Documentation for the configuration field. 128 field : `lsst.pex.config.ChoiceField` 134 allowed={
"move":
"move",
136 "hardlink":
"hard link",
137 "symlink":
"symbolic (soft) link"},
145 padRegionAmount = Field(
148 doc=
"Pad an image with specified number of pixels before calculating region" 151 doc=(
"Fully-qualified Python name of the `Instrument` subclass to " 152 "associate with all raws."),
160 """Driver Task for ingesting raw data into Gen3 Butler repositories. 162 This Task is intended to be runnable from the command-line, but it doesn't 163 meet the other requirements of CmdLineTask or PipelineTask, and wouldn't 164 gain much from being one. It also wouldn't really be appropriate as a 165 subtask of a CmdLineTask or PipelineTask; it's a Task essentially just to 166 leverage the logging and configurability functionality that provides. 168 Each instance of `RawIngestTask` writes to the same Butler. Each 169 invocation of `RawIngestTask.run` ingests a list of files. 173 config : `RawIngestConfig` 174 Configuration for the task. 175 butler : `~lsst.daf.butler.Butler` 176 Butler instance. Ingested Datasets will be created as part of 177 ``butler.run`` and associated with its Collection. 179 Additional keyword arguments are forwarded to the `lsst.pipe.base.Task` 182 Other keyword arguments are forwarded to the Task base class constructor. 185 ConfigClass = RawIngestConfig
187 _DefaultName =
"ingest" 190 """Return the DatasetType of the Datasets ingested by this Task. 192 return DatasetType(
"raw", (
"instrument",
"detector",
"exposure"),
"Exposure",
193 universe=self.
butler.registry.dimensions)
195 def __init__(self, config: Optional[RawIngestConfig] =
None, *, butler: Butler, **kwds: Any):
208 """Extract and process metadata from a single raw file. 218 A structure containing the metadata extracted from the file, 219 as well as the original filename. All fields will be populated, 220 but the `RawFileData.dataId` attribute will be a minimal 221 (unexpanded) `DataCoordinate` instance. 223 phdu = readMetadata(filename, 0)
224 header = merge_headers([phdu, readMetadata(filename)], mode=
"overwrite")
226 obsInfo = ObservationInfo(header)
227 dataId = DataCoordinate.standardize(instrument=obsInfo.instrument,
228 exposure=obsInfo.exposure_id,
229 detector=obsInfo.detector_num,
231 if obsInfo.instrument != self.
instrument.getName():
232 raise ValueError(f
"Incorrect instrument (expected {self.instrument.getName()}, " 233 f
"got {obsInfo.instrument}) for file {filename}.")
234 FormatterClass = self.
instrument.getRawFormatter(dataId)
235 if obsInfo.visit_id
is not None and obsInfo.tracking_radec
is not None:
236 formatter = FormatterClass.fromMetadata(metadata=header, obsInfo=obsInfo)
237 visitInfo = formatter.makeVisitInfo()
238 detector = self.
camera[obsInfo.detector_num]
239 wcs = formatter.makeWcs(visitInfo, detector)
240 pixBox = Box2D(detector.getBBox())
241 if self.config.padRegionAmount > 0:
242 pixBox.grow(self.config.padRegionAmount)
243 pixCorners = pixBox.getCorners()
244 sphCorners = [wcs.pixelToSky(point).getVector()
for point
in pixCorners]
245 region = ConvexPolygon(sphCorners)
248 return RawFileData(obsInfo=obsInfo, region=region, filename=filename,
249 FormatterClass=FormatterClass, dataId=dataId)
252 """Group an iterable of `RawFileData` by exposure. 256 files : iterable of `RawFileData` 257 File-level information to group. 261 exposures : `list` of `RawExposureData` 262 A list of structures that group the file-level information by 263 exposure. The `RawExposureData.records` attributes of elements 264 will be `None`, but all other fields will be populated. The 265 `RawExposureData.dataId` attributes will be minimal (unexpanded) 266 `DataCoordinate` instances. 268 exposureDimensions = self.
universe[
"exposure"].graph
269 byExposure = defaultdict(list)
271 byExposure[f.dataId.subset(exposureDimensions)].append(f)
274 for dataId, exposureFiles
in byExposure.items()]
277 """Collect the `DimensionRecord` instances that must be inserted into 278 the `~lsst.daf.butler.Registry` before an exposure's raw files may be. 282 exposure : `RawExposureData` 283 A structure containing information about the exposure to be 284 ingested. Should be considered consumed upon return. 288 exposure : `RawExposureData` 289 An updated version of the input structure, with 290 `RawExposureData.records` populated. 292 firstFile = exposure.files[0]
293 VisitDetectorRegionRecordClass = self.
universe[
"visit_detector_region"].RecordClass
295 "exposure": [makeExposureRecordFromObsInfo(firstFile.obsInfo, self.
universe)],
297 if firstFile.obsInfo.visit_id
is not None:
298 exposure.records[
"visit_detector_region"] = []
300 for file
in exposure.files:
301 if file.obsInfo.visit_id != firstFile.obsInfo.visit_id:
302 raise ValueError(f
"Inconsistent visit/exposure relationship for " 303 f
"exposure {firstFile.obsInfo.exposure_id} between " 304 f
"{file.filename} and {firstFile.filename}: " 305 f
"{file.obsInfo.visit_id} != {firstFile.obsInfo.visit_id}.")
306 if file.region
is None:
307 self.log.warn(
"No region found for visit=%s, detector=%s.", file.obsInfo.visit_id,
308 file.obsInfo.detector_num)
310 visitVertices.extend(file.region.getVertices())
311 exposure.records[
"visit_detector_region"].append(
312 VisitDetectorRegionRecordClass.fromDict({
313 "instrument": file.obsInfo.instrument,
314 "visit": file.obsInfo.visit_id,
315 "detector": file.obsInfo.detector_num,
316 "region": file.region,
320 visitRegion = ConvexPolygon(visitVertices)
322 self.log.warn(
"No region found for visit=%s.", file.obsInfo.visit_id,
323 file.obsInfo.detector_num)
325 exposure.records[
"visit"] = [
326 makeVisitRecordFromObsInfo(firstFile.obsInfo, self.
universe, region=visitRegion)
331 """Expand the data IDs associated with a raw exposure to include 332 additional metadata records. 336 exposure : `RawExposureData` 337 A structure containing information about the exposure to be 338 ingested. Must have `RawExposureData.records` populated. Should 339 be considered consumed upon return. 343 exposure : `RawExposureData` 344 An updated version of the input structure, with 345 `RawExposureData.dataId` and nested `RawFileData.dataId` attributes 346 containing `~lsst.daf.butler.ExpandedDataCoordinate` instances. 348 hasVisit =
"visit" in data.records
352 data.dataId = self.
butler.registry.expandDataId(
359 "exposure": data.records[
"exposure"][0],
360 "visit": data.records[
"visit"][0]
if hasVisit
else None,
367 vdrRecords = data.records[
"visit_detector_region"]
if hasVisit
else itertools.repeat(
None)
368 for file, vdrRecord
in zip(data.files, vdrRecords):
369 file.dataId = self.
butler.registry.expandDataId(
371 records=dict(data.dataId.records, visit_detector_region=vdrRecord)
375 def prep(self, files, pool: Optional[Pool] =
None, processes: int = 1) -> Iterator[RawExposureData]:
376 """Perform all ingest preprocessing steps that do not involve actually 377 modifying the database. 381 files : iterable over `str` or path-like objects 382 Paths to the files to be ingested. Will be made absolute 383 if they are not already. 384 pool : `multiprocessing.Pool`, optional 385 If not `None`, a process pool with which to parallelize some 387 processes : `int`, optional 388 The number of processes to use. Ignored if ``pool`` is not `None`. 392 exposure : `RawExposureData` 393 Data structures containing dimension records, filenames, and data 394 IDs to be ingested (one structure for each exposure). 396 if pool
is None and processes > 1:
397 pool = Pool(processes)
398 mapFunc = map
if pool
is None else pool.imap_unordered
432 """Insert dimension records for one or more exposures. 436 records : `dict` mapping `str` to `list` 437 Dimension records to be inserted, organized as a mapping from 438 dimension name to a list of records for that dimension. This 439 may be a single `RawExposureData.records` dict, or an aggregate 440 for multiple exposures created by concatenating the value lists 441 of those dictionaries. 445 refs : `list` of `lsst.daf.butler.DatasetRef` 446 Dataset references for ingested raws. 455 for dimension
in (
"visit",
"exposure",
"visit_detector_region"):
456 recordsForDimension = records.get(dimension)
457 if recordsForDimension:
462 self.
butler.registry.insertDimensionData(dimension, *recordsForDimension)
465 ) -> List[DatasetRef]:
466 """Ingest all raw files in one exposure. 470 exposure : `RawExposureData` 471 A structure containing information about the exposure to be 472 ingested. Must have `RawExposureData.records` populated and all 473 data ID attributes expanded. 474 butler : `lsst.daf.butler.Butler`, optional 475 Butler to use for ingest. If not provided, ``self.butler`` will 480 refs : `list` of `lsst.daf.butler.DatasetRef` 481 Dataset references for ingested raws. 489 for file
in exposure.files:
490 path = os.path.abspath(file.filename)
491 ref = butler.ingest(path, self.
datasetType, file.dataId,
492 transfer=self.config.transfer,
493 formatter=file.FormatterClass)
497 def run(self, files, pool: Optional[Pool] =
None, processes: int = 1):
498 """Ingest files into a Butler data repository. 500 This creates any new exposure or visit Dimension entries needed to 501 identify the ingested files, creates new Dataset entries in the 502 Registry and finally ingests the files themselves into the Datastore. 503 Any needed instrument, detector, and physical_filter Dimension entries 504 must exist in the Registry before `run` is called. 508 files : iterable over `str` or path-like objects 509 Paths to the files to be ingested. Will be made absolute 510 if they are not already. 511 pool : `multiprocessing.Pool`, optional 512 If not `None`, a process pool with which to parallelize some 514 processes : `int`, optional 515 The number of processes to use. Ignored if ``pool`` is not `None`. 519 refs : `list` of `lsst.daf.butler.DatasetRef` 520 Dataset references for ingested raws. 524 This method inserts all records (dimensions and datasets) for an 525 exposure within a transaction, guaranteeing that partial exposures 528 exposureData = self.
prep(files, pool=pool, processes=processes)
539 for exposure
in exposureData:
540 with self.
butler.transaction():
def ingestExposureDatasets
def makeTransferChoiceField(doc="How to transfer files (None for no transfer).", default=None)
def collectDimensionRecords