23 __all__ = (
"BootstrapRepoConfig",
"BootstrapRepoTask",
"BootstrapRepoInputs",
24 "BootstrapRepoSkyMapConfig",
"BootstrapRepoRefCatConfig")
27 from dataclasses
import dataclass
28 from typing
import List
31 from lsst
import sphgeom
32 from lsst.daf.butler
import Butler, DatasetType
33 from lsst.daf.butler.instrument
import Instrument
34 from lsst.pex.config
import Config, Field, ConfigurableField, ConfigDictField, ConfigField
35 from lsst.pipe.base
import Task
37 from lsst.skymap
import skyMapRegistry
38 from lsst.meas.algorithms
import DatasetConfig
40 from .repoConverter
import RepoConverter
41 from .calibRepoConverter
import CalibRepoConverter
45 datasetTypeName = Field((
"DatasetType used to write the SkyMap instance. If None, the instance will " 46 "not be written, and only the Registry will be modified."),
47 dtype=str, default=
"deepCoadd_skyMap", optional=
True)
48 collection = Field((
"Butler collection the SkyMap instance should be written to. If None, the " 49 "collection used to initialize the butler will be used."),
50 dtype=str, default=
"skymaps", optional=
True)
51 skyMap = skyMapRegistry.makeField(
52 doc=
"Type and parameters for the SkyMap itself.",
58 datasetTypeName = Field((
"DatasetType used to write the catalog shards.."),
59 dtype=str, default=
"ref_cat")
60 filterByRawRegions = Field((
"If True, do not ingest shards that do not overlap visits. " 61 "Does not guarantee that all ingested shards will overlap a visit."),
62 dtype=bool, default=
True)
63 collection = Field((
"Butler collection the reference catalog should be written to. If None, the " 64 "collection used to initialize the butler will be used. May also be a string with " 65 "the format placeholder '{name}', which will be replaced with the reference " 66 "catalog name (i.e. the key of the configuration dictionary,"),
67 dtype=str, default=
"refcats/{name}", optional=
True)
72 collection = Field((
"Butler collection that datasets should be ingested into. " 73 "If None, the collection used to initialize the butler will be used."),
74 dtype=str, default=
None, optional=
True)
79 skymap = Field(
"SkyMap dimension name used to define the tracts and patches for bright object masks.",
80 dtype=str, default=
None, optional=
False)
81 filterByRawRegions = Field((
"If True, do not ingest files that do not overlap visits. " 82 "Does not guarantee that all ingested files will overlap a visit."),
83 dtype=bool, default=
True)
87 raws = ConfigurableField(target=RawIngestTask,
88 doc=(
"Configuration for subtask responsible for ingesting raws and adding " 89 "visit and exposure dimension entries."))
90 skymaps = ConfigDictField(doc=(
"SkyMap definitions to register and ingest into the repo, keyed by " 91 "skymap dimension name."),
93 itemtype=BootstrapRepoSkyMapConfig,
95 refCats = ConfigDictField(doc=(
"Reference catalogs to ingest into the repo, keyed by their subdirectory " 96 "within the overall reference catalog root."),
98 itemtype=BootstrapRepoRefCatConfig,
100 brightObjectMasks = ConfigField(doc=
"Configuration for ingesting brightObjectMask files.",
101 dtype=BootstrapRepoBrightObjectMasksConfig)
102 calibrations = ConfigField(doc=
"Configuration for ingesting and creating master calibration products.",
103 dtype=BootstrapRepoGenericIngestConfig)
106 self.
raws.transfer =
"symlink" 111 """Simple struct that aggregates all non-config inputs to 114 Generally, this stuct contains inputs that depend on the organization 115 of the input files on a particular system, while the config includes 116 everything else. The exception is the ``instrument`` attribute, which 117 cannot be included in the config because it's expected that driver code 118 will actually use it (via 119 `~lsst.daf.butler.instrument.Instrument.applyConfigOverrides`) to define 123 instrument: Instrument
124 """Instrument subclass instance for the raws and calibrations to be 125 included in the initial repo. 129 """List of filenames for raw files to ingest (complete paths). 133 """Root of the directory containing the reference catalogs, with immediate 134 subdirectories that correspond to different reference catalogs. 137 brightObjectMaskRoot: str
138 """Root of the Gen2 repository containing bright object masks. 142 """Root of the Gen2 calibraion repository containing flats, biases, 148 """A Task that populates a Gen3 repo with the minimum content needed to 149 run the DRP pipelines. 151 BootstrapRepoTask currently relies on Gen2 data repository information 152 for both bright object masks and master calibrations, but nothing else; 153 unlike dedicated Gen2->Gen3 conversion code, it will be updated in the 154 future as more pure-Gen3 approaches become available. 156 Like other Gen3 Tasks that are not PipelineTasks, BootstrapRepoTask does 157 not yet have a dedicated, general-purpose command-line driver. At least 158 for now, it is instead expected that custom driver scripts will be written 159 for different contexts and predefined datasets. 163 config : `BootstrapRepoConfig` 164 Configuration for the task. 165 butler : `lsst.daf.butler.Butler` 166 Gen3 Butler defining the repository to populate. New butlers with 167 different output collections will be created as necessary from this 168 butler to match the output collections defined in the configuration. 170 Additional keyword arguments are forwarded to the 171 `lsst.pipe.base.Task` constructor. 174 ConfigClass = BootstrapRepoConfig
176 _DefaultName =
"bootstrapRepo" 178 def __init__(self, config=None, *, butler, **kwds):
181 self.makeSubtask(
"raws", butler=self.
butler)
185 """Create a new butler that writes into the given collection. 189 collection : `str`, optional 190 The new output collection. If `None`, ``self.butler`` is returned 195 butler : `lsst.daf.butler.Butler` 196 Butler instance pointing at the same repository as 197 ``self.butler``, but possibly a different collection. 199 if collection
is not None:
200 return Butler(butler=self.
butler, run=collection)
204 """Run all steps involved in populating the new repository. 208 inputs : `BootstrapRepoInputs` 209 Filenames and paths for the data to be ingested. 219 """Add an instrument, associated metadata, and human-curated 220 calibrations to the repository. 224 instrument : `lsst.daf.butler.instrument.Instrument` 225 Instrument class that defines detectors, physical filters, and 226 curated calibrations to ingest. 228 self.log.info(
"Registering instrument '%s' and adding curated calibrations.", instrument.getName())
229 with self.
butler.transaction():
230 instrument.register(self.
butler.registry)
231 instrument.writeCuratedCalibrations(self.
getButler(self.config.calibrations.collection))
234 """Add configured SkyMaps to the repository. 236 This both registers skymap dimension entries (the skymap, tract, and 237 patch tables, and their associated join tables) and adds a 238 ``<something>Coadd_skyMap`` dataset. 240 for name, config
in self.config.skymaps.items():
241 self.log.info(
"Registering skymap '%s'.", name)
242 with self.
butler.transaction():
243 skyMap = config.skyMap.apply()
244 skyMap.register(name, self.
butler.registry)
245 if config.datasetTypeName
is not None:
246 datasetType = DatasetType(config.datasetTypeName, dimensions=[
"skymap"],
247 storageClass=
"SkyMap",
248 universe=self.
butler.registry.dimensions)
249 self.
butler.registry.registerDatasetType(datasetType)
250 self.
getButler(config.collection).put(skyMap, datasetType, skymap=name)
254 """Ingest raw images. 256 This step must be run after `bootstrapInstrument`, but may be run 257 multiple times with different arguments (which may be overlapping if 258 the nested `RawIngestTask` is configured to ignore duplicates). 262 files : sequence of `str` 263 The complete path names of the files to be ingested. 265 self.log.info(
"Ingesting raw images.")
266 return self.raws.
run(files)
269 """Compute and return the skypix dimension entries that overlap 270 already-ingested visits. 274 row[
"skypix"]
for row
in self.
butler.registry.query(
275 "SELECT DISTINCT skypix FROM visit_skypix_join" 280 """Ingest reference catalogs. 282 This step must be run after `bootstrapRaws` if the 283 ``filterByRawRegions`` config option is `True` for any reference 289 Root of the directory containing the reference catalogs, with 290 immediate subdirectories that correspond to different reference 293 if not self.config.refCats:
295 if any(config.filterByRawRegions
for config
in self.config.refCats.values()):
297 datasetType = DatasetType(
"ref_cat", dimensions=[
"skypix"], storageClass=
"SimpleCatalog",
298 universe=self.
butler.registry.dimensions)
299 self.
butler.registry.registerDatasetType(datasetType)
300 for name, config
in self.config.refCats.items():
301 self.log.info(
"Ingesting reference catalog '%s'.", name)
302 with self.
butler.transaction():
303 onDiskConfig = DatasetConfig()
304 onDiskConfig.load(os.path.join(root, name,
"config.py"))
305 if onDiskConfig.indexer.name !=
"HTM":
306 raise ValueError(f
"Reference catalog '{name}' uses unsupported " 307 f
"pixelization '{onDiskConfig.indexer.name}'.")
308 if not isinstance(self.
butler.registry.pixelization, sphgeom.HtmPixelization):
309 raise ValueError(f
"Registry uses unsupported pixelization class " 310 f
"{self.butler.registry.pixelization.__class__}.")
311 if onDiskConfig.indexer[
"HTM"].depth != self.
butler.registry.pixelization.getLevel():
312 raise ValueError(f
"Registry HTM level {self.butler.registry.pixelization.getLevel()} " 313 f
"does not match reference catalog level {onDiskConfig.indexer.depth}.")
314 butler = self.
getButler(config.collection.format(name))
315 if config.filterByRawRegions:
317 for index
in rawSkyPixels:
318 path = os.path.join(root, name, f
"{index}.fits")
319 if os.path.exists(path):
320 butler.ingest(path, datasetType, transfer=config.transfer, skypix=index)
322 missing.append(index)
324 self.log.warn(
"Some overlapping reference catalog shards missing: %s", missing)
326 for path
in glob.glob(os.path.join(root, name,
"*.fits")):
327 if path.endswith(
"master_schema.fits"):
329 _, filename = os.path.split(path)
330 basename, _ = os.path.splitext(filename)
332 index = int(basename)
334 self.log.warn(
"Unrecognized file in reference catalog root: '%s'.", path)
336 butler.ingest(path, datasetType, transfer=config.transfer, skypix=index)
339 """Compute and return the tract dimension entries that overlap 340 already-ingested visits. 344 row[
"tract"]
for row
in self.
butler.registry.query(
345 "SELECT DISTINCT tract FROM visit_tract_join WHERE skymap=:skymap",
351 """Ingest bright object masks from a Gen2 data repository. 353 This step must be run after `bootstrapRaws` if the 354 ``filterByRawRegions`` config option is `True` for any reference 355 catalog, and must always be run after `bootstrapSkyMaps`. 360 Root of the Gen2 repository containing bright object masks. 361 instrument : `lsst.daf.butler.instrument.Instrument` 362 Instrument subclass instance; used to relate Gen2 filter 363 strings to Gen3 physical_filters and abstract_filters. 365 self.log.info(
"Ingesting bright object masks.")
366 butler = self.
getButler(self.config.brightObjectMasks.collection)
368 "skymap": self.config.brightObjectMasks.skymap,
369 "instrument": instrument.getName()
371 converter =
RepoConverter(root, universe=butler.registry.dimensions, baseDataId=baseDataId,
372 skyMap=self.
skyMaps[self.config.brightObjectMasks.skymap])
373 converter.addDatasetType(
"brightObjectMask",
"ObjectMaskCatalog")
374 if self.config.brightObjectMasks.filterByRawRegions:
376 with self.
butler.transaction():
377 converter.convertRepo(butler, directory=f
"{root}/deepCoadd/BrightObjectMasks/{tract:d}",
378 transfer=self.config.brightObjectMasks.transfer)
380 with self.
butler.transaction():
381 converter.convertRepo(butler, transfer=self.config.brightObjectMasks.transfer)
384 """Ingest master calibrations from a Gen2 calibration data repository. 386 At present, all master calibrations in the Gen2 repostory are 387 transferred, even those unrelated to the ingested raws. 389 This step must be run after `bootstrapInstrument`. 393 instrument : `lsst.daf.butler.instrument.Instrument` 394 Instrument subclass instance for the raws and calibrations to be 395 included in the initial repo. 397 Root of the Gen2 calibration data repository. 399 self.log.info(
"Ingesting calibrations.")
400 baseDataId = {
"instrument": instrument.getName()}
401 butler = self.
getButler(self.config.calibrations.collection)
402 converter =
CalibRepoConverter(root, universe=butler.registry.dimensions, baseDataId=baseDataId)
403 converter.addDatasetType(
"flat",
"MaskedImageF")
404 converter.addDatasetType(
"bias",
"ImageF")
405 converter.addDatasetType(
"dark",
"ImageF")
406 converter.addDatasetType(
"sky",
"ExposureF")
407 converter.addDatasetType(
"fringe",
"ExposureF")
409 with self.
butler.transaction():
410 converter.convertRepo(butler, transfer=self.config.brightObjectMasks.transfer)
def computeRawSkyPixels(self)
def __init__(self, config=None, butler, kwds)
def bootstrapCalibrations(self, instrument, root)
def bootstrapRaws(self, files)
def makeTransferChoiceField(doc="How to transfer files (None for no transfer).", default=None)
def getButler(self, collection=None)
def bootstrapRefCats(self, root)
def computeRawTracts(self, skymap)
def bootstrapInstrument(self, instrument)
def bootstrapSkyMaps(self)
def bootstrapBrightObjectMasks(self, instrument, root)