21 from __future__
import annotations
23 __all__ = [
"ConvertRepoConfig",
"ConvertRepoTask",
"ConvertRepoSkyMapConfig"]
27 from dataclasses
import dataclass
28 from typing
import Iterable, Optional, List, Dict
31 from lsst.daf.butler
import (
35 from lsst.pex.config
import Config, ConfigurableField, ConfigDictField, DictField, ListField, Field
36 from lsst.pipe.base
import Task
37 from lsst.skymap
import skyMapRegistry, BaseSkyMap
39 from ..ingest
import RawIngestTask
40 from .repoConverter
import ConversionSubset
41 from .rootRepoConverter
import RootRepoConverter
42 from .calibRepoConverter
import CalibRepoConverter
43 from .standardRepoConverter
import StandardRepoConverter
48 """Struct containing information about a skymap that may appear in a Gen2 53 """Name of the skymap used in Gen3 data IDs. 57 """Hash computed by `BaseSkyMap.getSha1`. 61 """Name of the skymap used in Gen3 data IDs. 65 """Whether this skymap has been found in at least one repository being 71 """Sub-config used to hold the parameters of a SkyMap. 75 This config only needs to exist because we can't put a 76 `~lsst.pex.config.RegistryField` directly inside a 77 `~lsst.pex.config.ConfigDictField`. 79 It needs to have its only field named "skyMap" for compatibility with the 80 configuration of `lsst.pipe.tasks.MakeSkyMapTask`, which we want so we can 81 use one config file in an obs package to configure both. 83 This name leads to unfortunate repetition with the field named 84 "skymap" that holds it - "skyMap[name].skyMap" - but that seems 87 skyMap = skyMapRegistry.makeField(
88 doc=
"Type and parameters for the SkyMap itself.",
94 raws = ConfigurableField(
95 "Configuration for subtask responsible for ingesting raws and adding " 96 "visit and exposure dimension entries.",
99 skyMaps = ConfigDictField(
100 "Mapping from Gen3 skymap name to the parameters used to construct a " 101 "BaseSkyMap instance. This will be used to associate names with " 102 "existing skymaps found in the Gen2 repo.",
104 itemtype=ConvertRepoSkyMapConfig,
107 rootSkyMapName = Field(
108 "Name of a Gen3 skymap (an entry in ``self.skyMaps``) to assume for " 109 "datasets in the root repository when no SkyMap is found there. ",
114 collections = DictField(
115 "Special collections (values) for certain dataset types (keys). " 116 "These are used in addition to rerun collections for datasets in " 117 "reruns. The 'raw' dataset must have an entry here if it is to be " 122 "deepCoadd_skyMap":
"skymaps",
123 "brightObjectMask":
"masks",
126 storageClasses = DictField(
127 "Mapping from dataset type name or Gen2 policy entry (e.g. 'python' " 128 "or 'persistable') to the Gen3 StorageClass name.",
132 "BaseSkyMap":
"SkyMap",
133 "BaseCatalog":
"Catalog",
134 "BackgroundList":
"Background",
136 "MultilevelParquetTable":
"DataFrame",
137 "ParquetTable":
"DataFrame",
140 doRegisterInstrument = Field(
141 "If True (default), add dimension records for the Instrument and its " 142 "filters and detectors to the registry instead of assuming they are " 147 doWriteCuratedCalibrations = Field(
148 "If True (default), ingest human-curated calibrations directly via " 149 "the Instrument interface. Note that these calibrations are never " 150 "converted from Gen2 repositories.",
155 "The names of reference catalogs (subdirectories under ref_cats) to " 160 fileIgnorePatterns = ListField(
161 "Filename globs that should be ignored instead of being treated as " 164 default=[
"README.txt",
"*~?",
"butler.yaml",
"gen3.sqlite3",
165 "registry.sqlite3",
"calibRegistry.sqlite3",
"_mapper",
166 "_parent",
"repositoryCfg.yaml"]
168 datasetIncludePatterns = ListField(
169 "Glob-style patterns for dataset type names that should be converted.",
173 datasetIgnorePatterns = ListField(
174 "Glob-style patterns for dataset type names that should not be " 175 "converted despite matching a pattern in datasetIncludePatterns.",
180 "Key used for the Gen2 equivalent of 'detector' in data IDs.",
185 "If True (default), only convert datasets that are related to the " 186 "ingested visits. Ignored unless a list of visits is passed to " 194 return self.
raws.transfer
198 self.
raws.transfer = value
202 return self.
raws.instrument
206 self.
raws.instrument = value
216 """A task that converts one or more related Gen2 data repositories to a 217 single Gen3 data repository (with multiple collections). 221 config: `ConvertRepoConfig` 222 Configuration for this task. 223 butler3: `lsst.daf.butler.Butler` 224 Gen3 Butler instance that represents the data repository datasets will 225 be ingested into. The collection and/or run associated with this 226 Butler will be ignored in favor of collections/runs passed via config 229 Other keyword arguments are forwarded to the `Task` constructor. 233 Most of the work of converting repositories is delegated to instances of 234 the `RepoConverter` hierarchy. The `ConvertRepoTask` instance itself holds 235 only state that is relevant for all Gen2 repositories being ingested, while 236 each `RepoConverter` instance holds only state relevant for the conversion 237 of a single Gen2 repository. Both the task and the `RepoConverter` 238 instances are single use; `ConvertRepoTask.run` and most `RepoConverter` 239 methods may only be called once on a particular instance. 242 ConfigClass = ConvertRepoConfig
244 _DefaultName =
"convertRepo" 246 def __init__(self, config=None, *, butler3: Butler3, **kwds):
252 self.makeSubtask(
"raws", butler=butler3)
256 self.
instrument = doImport(self.config.instrument)()
259 for name, config
in self.config.skyMaps.items():
260 instance = config.skyMap.apply()
261 struct =
ConfiguredSkyMap(name=name, sha1=instance.getSha1(), instance=instance)
267 """Return `True` if configuration indicates that the given dataset type 270 This method is intended to be called primarily by the 271 `RepoConverter` instances used interally by the task. 276 Name of the dataset type. 281 Whether the dataset should be included in the conversion. 284 any(fnmatch.fnmatchcase(datasetTypeName, pattern)
285 for pattern
in self.config.datasetIncludePatterns)
286 and not any(fnmatch.fnmatchcase(datasetTypeName, pattern)
287 for pattern
in self.config.datasetIgnorePatterns)
291 """Indicate that a repository uses the given SkyMap. 293 This method is intended to be called primarily by the 294 `RepoConverter` instances used interally by the task. 298 skyMap : `lsst.skymap.BaseSkyMap` 299 SkyMap instance being used, typically retrieved from a Gen2 305 The name of the skymap in Gen3 data IDs. 307 sha1 = skyMap.getSha1()
310 except KeyError
as err:
311 raise LookupError(f
"SkyMap with sha1={sha1} not included in configuration.")
from err
316 """Register all skymaps that have been marked as used. 318 This method is intended to be called primarily by the 319 `RepoConverter` instances used interally by the task. 323 subset : `ConversionSubset`, optional 324 Object that will be used to filter converted datasets by data ID. 325 If given, it will be updated with the tracts of this skymap that 326 overlap the visits in the subset. 330 struct.instance.register(struct.name, self.
registry)
331 if subset
is not None and self.config.relatedOnly:
332 subset.addSkyMap(self.
registry, struct.name)
335 """Indicate that a repository uses the given SkyPix dimension. 337 This method is intended to be called primarily by the 338 `RepoConverter` instances used interally by the task. 342 dimension : `lsst.daf.butler.SkyPixDimension` 343 Dimension represening a pixelization of the sky. 348 """Register all skymaps that have been marked as used. 350 This method is intended to be called primarily by the 351 `RepoConverter` instances used interally by the task. 355 subset : `ConversionSubset`, optional 356 Object that will be used to filter converted datasets by data ID. 357 If given, it will be updated with the pixelization IDs that 358 overlap the visits in the subset. 360 if subset
is not None and self.config.relatedOnly:
362 subset.addSkyPix(self.
registry, dimension)
364 def run(self, root: str, collections: List[str], *,
365 calibs: Dict[str, List[str]] =
None,
366 reruns: Dict[str, List[str]] =
None,
367 visits: Optional[Iterable[int]] =
None):
368 """Convert a group of related data repositories. 373 Complete path to the root Gen2 data repository. This should be 374 a data repository that includes a Gen2 registry and any raw files 375 and/or reference catalogs. 376 collections : `list` of `str` 377 Gen3 collections that datasets from the root repository should be 378 associated with. This should include any rerun collection that 379 these datasets should also be considered to be part of; because of 380 structural difference between Gen2 parent/child relationships and 381 Gen3 collections, these cannot be reliably inferred. 383 Dictionary mapping calibration repository path to the collections 384 that the repository's datasets should be associated with. The path 385 may be relative to ``root`` or absolute. Collections should 386 include child repository collections as appropriate (see 387 documentation for ``collections``). 389 Dictionary mapping rerun repository path to the collections that 390 the repository's datasets should be associated with. The path may 391 be relative to ``root`` or absolute. Collections should include 392 child repository collections as appropriate (see documentation for 394 visits : iterable of `int`, optional 395 The integer IDs of visits to convert. If not provided, all visits 396 in the Gen2 root repository will be converted. 403 if visits
is not None:
406 if self.config.relatedOnly:
407 self.log.warn(
"config.relatedOnly is True but all visits are being ingested; " 408 "no filtering will be done.")
416 if self.config.doRegisterInstrument:
424 rootConverter =
RootRepoConverter(task=self, root=root, collections=collections, subset=subset)
426 converters.append(rootConverter)
428 for root, collections
in calibs.items():
429 if not os.path.isabs(root):
430 root = os.path.join(rootConverter.root, root)
432 mapper=rootConverter.mapper,
433 subset=rootConverter.subset)
435 converters.append(converter)
437 for root, collections
in reruns.items():
438 if not os.path.isabs(root):
439 root = os.path.join(rootConverter.root, root)
441 subset=rootConverter.subset)
443 converters.append(converter)
460 for converter
in converters:
461 converter.insertDimensionData()
474 for converter
in converters:
475 converter.findDatasets()
478 for converter
in converters:
479 converter.expandDataIds()
482 for converter
in converters:
def isDatasetTypeIncluded