21 from __future__
import annotations
23 __all__ = [
"ConvertRepoConfig",
"ConvertRepoTask",
"ConvertRepoSkyMapConfig",
"Rerun"]
27 from dataclasses
import dataclass
28 from typing
import Iterable, Optional, List, Dict
31 from lsst.daf.butler
import (
36 from lsst.pex.config
import Config, ConfigurableField, ConfigDictField, DictField, ListField, Field
37 from lsst.pipe.base
import Task
38 from lsst.skymap
import skyMapRegistry, BaseSkyMap
40 from ..ingest
import RawIngestTask
41 from .repoConverter
import ConversionSubset
42 from .rootRepoConverter
import RootRepoConverter
43 from .calibRepoConverter
import CalibRepoConverter
44 from .standardRepoConverter
import StandardRepoConverter
49 """Struct containing information about a skymap that may appear in a Gen2 54 """Name of the skymap used in Gen3 data IDs. 58 """Hash computed by `BaseSkyMap.getSha1`. 62 """Name of the skymap used in Gen3 data IDs. 66 """Whether this skymap has been found in at least one repository being 73 """Specification for a Gen2 processing-output repository to convert. 77 """Absolute or relative (to the root repository) path to the Gen2 82 """Name of the `~lsst.daf.butler.CollectionType.RUN` collection datasets 83 will be inserted into (`str`). 86 chainName: Optional[str]
87 """Name of a `~lsst.daf.butler.CollectionType.CHAINED` collection that will 88 combine this repository's datasets with those of its parent repositories 93 """Collection names associated with parent repositories, used to define the 94 chained collection (`list` [ `str` ]). 96 Ignored if `chainName` is `None`. Runs used in the root repo are 97 automatically included. 102 """Sub-config used to hold the parameters of a SkyMap. 106 This config only needs to exist because we can't put a 107 `~lsst.pex.config.RegistryField` directly inside a 108 `~lsst.pex.config.ConfigDictField`. 110 It needs to have its only field named "skyMap" for compatibility with the 111 configuration of `lsst.pipe.tasks.MakeSkyMapTask`, which we want so we can 112 use one config file in an obs package to configure both. 114 This name leads to unfortunate repetition with the field named 115 "skymap" that holds it - "skyMap[name].skyMap" - but that seems 118 skyMap = skyMapRegistry.makeField(
119 doc=
"Type and parameters for the SkyMap itself.",
125 raws = ConfigurableField(
126 "Configuration for subtask responsible for ingesting raws and adding " 127 "visit and exposure dimension entries.",
128 target=RawIngestTask,
130 skyMaps = ConfigDictField(
131 "Mapping from Gen3 skymap name to the parameters used to construct a " 132 "BaseSkyMap instance. This will be used to associate names with " 133 "existing skymaps found in the Gen2 repo.",
135 itemtype=ConvertRepoSkyMapConfig,
138 rootSkyMapName = Field(
139 "Name of a Gen3 skymap (an entry in ``self.skyMaps``) to assume for " 140 "datasets in the root repository when no SkyMap is found there. ",
146 "A mapping from dataset type name to the RUN collection they should " 147 "be inserted into. This must include all datasets that can be found " 148 "in the root repository; other repositories will use per-repository " 153 "deepCoadd_skyMap":
"skymaps",
154 "brightObjectMask":
"masks",
157 storageClasses = DictField(
158 "Mapping from dataset type name or Gen2 policy entry (e.g. 'python' " 159 "or 'persistable') to the Gen3 StorageClass name.",
166 "defects":
"Defects",
167 "BaseSkyMap":
"SkyMap",
168 "BaseCatalog":
"Catalog",
169 "BackgroundList":
"Background",
171 "MultilevelParquetTable":
"DataFrame",
172 "ParquetTable":
"DataFrame",
176 formatterClasses = DictField(
177 "Mapping from dataset type name to formatter class. " 178 "By default these are derived from the formatters listed in the" 179 " Gen3 datastore configuration.",
184 targetHandlerClasses = DictField(
185 "Mapping from dataset type name to target handler class.",
190 doRegisterInstrument = Field(
191 "If True (default), add dimension records for the Instrument and its " 192 "filters and detectors to the registry instead of assuming they are " 197 doWriteCuratedCalibrations = Field(
198 "If True (default), ingest human-curated calibrations directly via " 199 "the Instrument interface. Note that these calibrations are never " 200 "converted from Gen2 repositories.",
205 "The names of reference catalogs (subdirectories under ref_cats) to " 210 fileIgnorePatterns = ListField(
211 "Filename globs that should be ignored instead of being treated as " 214 default=[
"README.txt",
"*~?",
"butler.yaml",
"gen3.sqlite3",
215 "registry.sqlite3",
"calibRegistry.sqlite3",
"_mapper",
216 "_parent",
"repositoryCfg.yaml"]
218 rawDatasetType = Field(
219 "Gen2 dataset type to use for raw data.",
223 datasetIncludePatterns = ListField(
224 "Glob-style patterns for dataset type names that should be converted.",
228 datasetIgnorePatterns = ListField(
229 "Glob-style patterns for dataset type names that should not be " 230 "converted despite matching a pattern in datasetIncludePatterns.",
235 "Key used for the Gen2 equivalent of 'detector' in data IDs.",
240 "If True (default), only convert datasets that are related to the " 241 "ingested visits. Ignored unless a list of visits is passed to " 246 curatedCalibrations = ListField(
247 "Dataset types that are handled by `Instrument.writeCuratedCalibrations()` " 248 "and thus should not be converted using the standard calibration " 249 "conversion system.",
252 "transmission_sensor",
253 "transmission_filter",
254 "transmission_optics",
255 "transmission_atmosphere",
261 return self.
raws.transfer
265 self.
raws.transfer = value
269 return self.
raws.instrument
273 self.
raws.instrument = value
283 """A task that converts one or more related Gen2 data repositories to a 284 single Gen3 data repository (with multiple collections). 288 config: `ConvertRepoConfig` 289 Configuration for this task. 290 butler3: `lsst.daf.butler.Butler` 291 Gen3 Butler instance that represents the data repository datasets will 292 be ingested into. The collection and/or run associated with this 293 Butler will be ignored in favor of collections/runs passed via config 296 Other keyword arguments are forwarded to the `Task` constructor. 300 Most of the work of converting repositories is delegated to instances of 301 the `RepoConverter` hierarchy. The `ConvertRepoTask` instance itself holds 302 only state that is relevant for all Gen2 repositories being ingested, while 303 each `RepoConverter` instance holds only state relevant for the conversion 304 of a single Gen2 repository. Both the task and the `RepoConverter` 305 instances are single use; `ConvertRepoTask.run` and most `RepoConverter` 306 methods may only be called once on a particular instance. 309 ConfigClass = ConvertRepoConfig
311 _DefaultName =
"convertRepo" 313 def __init__(self, config=None, *, butler3: Butler3, **kwds):
320 self.makeSubtask(
"raws", butler=butler3)
324 self.
instrument = doImport(self.config.instrument)()
327 for name, config
in self.config.skyMaps.items():
328 instance = config.skyMap.apply()
332 def _populateSkyMapDicts(self, name, instance):
333 struct =
ConfiguredSkyMap(name=name, sha1=instance.getSha1(), instance=instance)
338 """Return `True` if configuration indicates that the given dataset type 341 This method is intended to be called primarily by the 342 `RepoConverter` instances used interally by the task. 347 Name of the dataset type. 352 Whether the dataset should be included in the conversion. 355 any(fnmatch.fnmatchcase(datasetTypeName, pattern)
356 for pattern
in self.config.datasetIncludePatterns)
357 and not any(fnmatch.fnmatchcase(datasetTypeName, pattern)
358 for pattern
in self.config.datasetIgnorePatterns)
361 def useSkyMap(self, skyMap: BaseSkyMap, skyMapName: str) -> str:
362 """Indicate that a repository uses the given SkyMap. 364 This method is intended to be called primarily by the 365 `RepoConverter` instances used interally by the task. 369 skyMap : `lsst.skymap.BaseSkyMap` 370 SkyMap instance being used, typically retrieved from a Gen2 373 The name of the gen2 skymap, for error reporting. 378 The name of the skymap in Gen3 data IDs. 383 Raised if the specified skymap cannot be found. 385 sha1 = skyMap.getSha1()
390 except KeyError
as err:
391 msg = f
"SkyMap '{skyMapName}' with sha1={sha1} not included in configuration." 392 raise LookupError(msg)
from err
397 """Register all skymaps that have been marked as used. 399 This method is intended to be called primarily by the 400 `RepoConverter` instances used interally by the task. 404 subset : `ConversionSubset`, optional 405 Object that will be used to filter converted datasets by data ID. 406 If given, it will be updated with the tracts of this skymap that 407 overlap the visits in the subset. 411 struct.instance.register(struct.name, self.
registry)
412 if subset
is not None and self.config.relatedOnly:
413 subset.addSkyMap(self.
registry, struct.name)
416 """Indicate that a repository uses the given SkyPix dimension. 418 This method is intended to be called primarily by the 419 `RepoConverter` instances used interally by the task. 423 dimension : `lsst.daf.butler.SkyPixDimension` 424 Dimension represening a pixelization of the sky. 429 """Register all skymaps that have been marked as used. 431 This method is intended to be called primarily by the 432 `RepoConverter` instances used interally by the task. 436 subset : `ConversionSubset`, optional 437 Object that will be used to filter converted datasets by data ID. 438 If given, it will be updated with the pixelization IDs that 439 overlap the visits in the subset. 441 if subset
is not None and self.config.relatedOnly:
443 subset.addSkyPix(self.
registry, dimension)
445 def run(self, root: str, *,
446 calibs: Dict[str, str] =
None,
448 visits: Optional[Iterable[int]] =
None):
449 """Convert a group of related data repositories. 454 Complete path to the root Gen2 data repository. This should be 455 a data repository that includes a Gen2 registry and any raw files 456 and/or reference catalogs. 458 Dictionary mapping calibration repository path to the 459 `~lsst.daf.butler.CollectionType.RUN` collection that converted 460 datasets within it should be inserted into. 461 reruns : `list` of `Rerun` 462 Specifications for rerun (processing output) collections to 464 visits : iterable of `int`, optional 465 The integer IDs of visits to convert. If not provided, all visits 466 in the Gen2 root repository will be converted. 470 if visits
is not None:
473 if self.config.relatedOnly:
474 self.log.warn(
"config.relatedOnly is True but all visits are being ingested; " 475 "no filtering will be done.")
483 if self.config.doRegisterInstrument:
498 converters.append(rootConverter)
500 for calibRoot, run
in calibs.items():
501 if not os.path.isabs(calibRoot):
502 calibRoot = os.path.join(rootConverter.root, calibRoot)
504 mapper=rootConverter.mapper,
505 subset=rootConverter.subset)
507 converters.append(converter)
511 if not os.path.isabs(runRoot):
512 runRoot = os.path.join(rootConverter.root, runRoot)
514 subset=rootConverter.subset)
516 converters.append(converter)
533 for converter
in converters:
535 converter.insertDimensionData()
550 for converter
in converters:
551 converter.findDatasets()
554 for converter
in converters:
555 converter.expandDataIds()
558 for converter
in converters:
563 if spec.chainName
is not None:
564 self.
butler3.registry.registerCollection(spec.chainName, type=CollectionType.CHAINED)
565 chain = [spec.runName]
566 chain.extend(spec.parents)
567 chain.extend(rootConverter.getCollectionChain())
568 self.log.info(
"Defining %s from chain %s.", spec.chainName, chain)
569 self.
butler3.registry.setCollectionChain(spec.chainName, chain)
def isDatasetTypeIncluded
def _populateSkyMapDicts(self, name, instance)