21 from __future__
import annotations
23 __all__ = [
"ConvertRepoConfig",
"ConvertRepoTask",
"ConvertRepoSkyMapConfig"]
27 from dataclasses
import dataclass
28 from typing
import Iterable, Optional, List, Dict
31 from lsst.daf.butler
import (
35 from lsst.pex.config
import Config, ConfigurableField, ConfigDictField, DictField, ListField, Field
36 from lsst.pipe.base
import Task
37 from lsst.skymap
import skyMapRegistry, BaseSkyMap
39 from ..ingest
import RawIngestTask
40 from .repoConverter
import ConversionSubset
41 from .rootRepoConverter
import RootRepoConverter
42 from .calibRepoConverter
import CalibRepoConverter
43 from .standardRepoConverter
import StandardRepoConverter
48 """Struct containing information about a skymap that may appear in a Gen2 53 """Name of the skymap used in Gen3 data IDs. 57 """Hash computed by `BaseSkyMap.getSha1`. 61 """Name of the skymap used in Gen3 data IDs. 65 """Whether this skymap has been found in at least one repository being 71 """Sub-config used to hold the parameters of a SkyMap. 75 This config only needs to exist because we can't put a 76 `~lsst.pex.config.RegistryField` directly inside a 77 `~lsst.pex.config.ConfigDictField`. 79 It needs to have its only field named "skyMap" for compatibility with the 80 configuration of `lsst.pipe.tasks.MakeSkyMapTask`, which we want so we can 81 use one config file in an obs package to configure both. 83 This name leads to unfortunate repetition with the field named 84 "skymap" that holds it - "skyMap[name].skyMap" - but that seems 87 skyMap = skyMapRegistry.makeField(
88 doc=
"Type and parameters for the SkyMap itself.",
94 raws = ConfigurableField(
95 "Configuration for subtask responsible for ingesting raws and adding " 96 "visit and exposure dimension entries.",
99 skyMaps = ConfigDictField(
100 "Mapping from Gen3 skymap name to the parameters used to construct a " 101 "BaseSkyMap instance. This will be used to associate names with " 102 "existing skymaps found in the Gen2 repo.",
104 itemtype=ConvertRepoSkyMapConfig,
107 rootSkyMapName = Field(
108 "Name of a Gen3 skymap (an entry in ``self.skyMaps``) to assume for " 109 "datasets in the root repository when no SkyMap is found there. ",
114 collections = DictField(
115 "Special collections (values) for certain dataset types (keys). " 116 "These are used in addition to rerun collections for datasets in " 117 "reruns. The 'raw' dataset must have an entry here if it is to be " 122 "deepCoadd_skyMap":
"skymaps",
123 "brightObjectMask":
"masks",
126 storageClasses = DictField(
127 "Mapping from dataset type name or Gen2 policy entry (e.g. 'python' " 128 "or 'persistable') to the Gen3 StorageClass name.",
135 "defects":
"Defects",
136 "BaseSkyMap":
"SkyMap",
137 "BaseCatalog":
"Catalog",
138 "BackgroundList":
"Background",
140 "MultilevelParquetTable":
"DataFrame",
141 "ParquetTable":
"DataFrame",
145 formatterClasses = DictField(
146 "Mapping from dataset type name to formatter class. " 147 "By default these are derived from the formatters listed in the" 148 " Gen3 datastore configuration.",
153 targetHandlerClasses = DictField(
154 "Mapping from dataset type name to target handler class.",
159 doRegisterInstrument = Field(
160 "If True (default), add dimension records for the Instrument and its " 161 "filters and detectors to the registry instead of assuming they are " 166 doWriteCuratedCalibrations = Field(
167 "If True (default), ingest human-curated calibrations directly via " 168 "the Instrument interface. Note that these calibrations are never " 169 "converted from Gen2 repositories.",
174 "The names of reference catalogs (subdirectories under ref_cats) to " 179 fileIgnorePatterns = ListField(
180 "Filename globs that should be ignored instead of being treated as " 183 default=[
"README.txt",
"*~?",
"butler.yaml",
"gen3.sqlite3",
184 "registry.sqlite3",
"calibRegistry.sqlite3",
"_mapper",
185 "_parent",
"repositoryCfg.yaml"]
187 rawDatasetType = Field(
188 "Gen2 dataset type to use for raw data.",
192 datasetIncludePatterns = ListField(
193 "Glob-style patterns for dataset type names that should be converted.",
197 datasetIgnorePatterns = ListField(
198 "Glob-style patterns for dataset type names that should not be " 199 "converted despite matching a pattern in datasetIncludePatterns.",
204 "Key used for the Gen2 equivalent of 'detector' in data IDs.",
209 "If True (default), only convert datasets that are related to the " 210 "ingested visits. Ignored unless a list of visits is passed to " 215 curatedCalibrations = ListField(
216 "Dataset types that are handled by `Instrument.writeCuratedCalibrations()` " 217 "and thus should not be converted using the standard calibration " 218 "conversion system.",
221 "transmission_sensor",
222 "transmission_filter",
223 "transmission_optics",
224 "transmission_atmosphere",
230 return self.
raws.transfer
234 self.
raws.transfer = value
238 return self.
raws.instrument
242 self.
raws.instrument = value
252 """A task that converts one or more related Gen2 data repositories to a 253 single Gen3 data repository (with multiple collections). 257 config: `ConvertRepoConfig` 258 Configuration for this task. 259 butler3: `lsst.daf.butler.Butler` 260 Gen3 Butler instance that represents the data repository datasets will 261 be ingested into. The collection and/or run associated with this 262 Butler will be ignored in favor of collections/runs passed via config 265 Other keyword arguments are forwarded to the `Task` constructor. 269 Most of the work of converting repositories is delegated to instances of 270 the `RepoConverter` hierarchy. The `ConvertRepoTask` instance itself holds 271 only state that is relevant for all Gen2 repositories being ingested, while 272 each `RepoConverter` instance holds only state relevant for the conversion 273 of a single Gen2 repository. Both the task and the `RepoConverter` 274 instances are single use; `ConvertRepoTask.run` and most `RepoConverter` 275 methods may only be called once on a particular instance. 278 ConfigClass = ConvertRepoConfig
280 _DefaultName =
"convertRepo" 282 def __init__(self, config=None, *, butler3: Butler3, **kwds):
289 self.makeSubtask(
"raws", butler=butler3)
293 self.
instrument = doImport(self.config.instrument)()
296 for name, config
in self.config.skyMaps.items():
297 instance = config.skyMap.apply()
301 def _populateSkyMapDicts(self, name, instance):
302 struct =
ConfiguredSkyMap(name=name, sha1=instance.getSha1(), instance=instance)
307 """Return `True` if configuration indicates that the given dataset type 310 This method is intended to be called primarily by the 311 `RepoConverter` instances used interally by the task. 316 Name of the dataset type. 321 Whether the dataset should be included in the conversion. 324 any(fnmatch.fnmatchcase(datasetTypeName, pattern)
325 for pattern
in self.config.datasetIncludePatterns)
326 and not any(fnmatch.fnmatchcase(datasetTypeName, pattern)
327 for pattern
in self.config.datasetIgnorePatterns)
330 def useSkyMap(self, skyMap: BaseSkyMap, skyMapName: str) -> str:
331 """Indicate that a repository uses the given SkyMap. 333 This method is intended to be called primarily by the 334 `RepoConverter` instances used interally by the task. 338 skyMap : `lsst.skymap.BaseSkyMap` 339 SkyMap instance being used, typically retrieved from a Gen2 342 The name of the gen2 skymap, for error reporting. 347 The name of the skymap in Gen3 data IDs. 352 Raised if the specified skymap cannot be found. 354 sha1 = skyMap.getSha1()
359 except KeyError
as err:
360 msg = f
"SkyMap '{skyMapName}' with sha1={sha1} not included in configuration." 361 raise LookupError(msg)
from err
366 """Register all skymaps that have been marked as used. 368 This method is intended to be called primarily by the 369 `RepoConverter` instances used interally by the task. 373 subset : `ConversionSubset`, optional 374 Object that will be used to filter converted datasets by data ID. 375 If given, it will be updated with the tracts of this skymap that 376 overlap the visits in the subset. 380 struct.instance.register(struct.name, self.
registry)
381 if subset
is not None and self.config.relatedOnly:
382 subset.addSkyMap(self.
registry, struct.name)
385 """Indicate that a repository uses the given SkyPix dimension. 387 This method is intended to be called primarily by the 388 `RepoConverter` instances used interally by the task. 392 dimension : `lsst.daf.butler.SkyPixDimension` 393 Dimension represening a pixelization of the sky. 398 """Register all skymaps that have been marked as used. 400 This method is intended to be called primarily by the 401 `RepoConverter` instances used interally by the task. 405 subset : `ConversionSubset`, optional 406 Object that will be used to filter converted datasets by data ID. 407 If given, it will be updated with the pixelization IDs that 408 overlap the visits in the subset. 410 if subset
is not None and self.config.relatedOnly:
412 subset.addSkyPix(self.
registry, dimension)
414 def run(self, root: str, collections: List[str], *,
415 calibs: Dict[str, List[str]] =
None,
416 reruns: Dict[str, List[str]] =
None,
417 visits: Optional[Iterable[int]] =
None):
418 """Convert a group of related data repositories. 423 Complete path to the root Gen2 data repository. This should be 424 a data repository that includes a Gen2 registry and any raw files 425 and/or reference catalogs. 426 collections : `list` of `str` 427 Gen3 collections that datasets from the root repository should be 428 associated with. This should include any rerun collection that 429 these datasets should also be considered to be part of; because of 430 structural difference between Gen2 parent/child relationships and 431 Gen3 collections, these cannot be reliably inferred. 433 Dictionary mapping calibration repository path to the collections 434 that the repository's datasets should be associated with. The path 435 may be relative to ``root`` or absolute. Collections should 436 include child repository collections as appropriate (see 437 documentation for ``collections``). 439 Dictionary mapping rerun repository path to the collections that 440 the repository's datasets should be associated with. The path may 441 be relative to ``root`` or absolute. Collections should include 442 child repository collections as appropriate (see documentation for 444 visits : iterable of `int`, optional 445 The integer IDs of visits to convert. If not provided, all visits 446 in the Gen2 root repository will be converted. 453 if visits
is not None:
456 if self.config.relatedOnly:
457 self.log.warn(
"config.relatedOnly is True but all visits are being ingested; " 458 "no filtering will be done.")
466 if self.config.doRegisterInstrument:
479 rootConverter =
RootRepoConverter(task=self, root=root, collections=collections, subset=subset)
481 converters.append(rootConverter)
483 for root, collections
in calibs.items():
484 if not os.path.isabs(root):
485 root = os.path.join(rootConverter.root, root)
487 mapper=rootConverter.mapper,
488 subset=rootConverter.subset)
490 converters.append(converter)
492 for root, collections
in reruns.items():
493 if not os.path.isabs(root):
494 root = os.path.join(rootConverter.root, root)
496 subset=rootConverter.subset)
498 converters.append(converter)
515 for converter
in converters:
517 converter.insertDimensionData()
532 for converter
in converters:
533 converter.findDatasets()
536 for converter
in converters:
537 converter.expandDataIds()
540 for converter
in converters:
def isDatasetTypeIncluded
def _populateSkyMapDicts(self, name, instance)