21 from __future__
import annotations
23 __all__ = [
"ConvertRepoConfig",
"ConvertRepoTask",
"ConvertRepoSkyMapConfig",
"Rerun"]
27 from dataclasses
import dataclass
28 from typing
import Iterable, Optional, List, Dict
31 from lsst.daf.butler
import (
36 from lsst.pex.config
import Config, ConfigurableField, ConfigDictField, DictField, ListField, Field
37 from lsst.pipe.base
import Task
38 from lsst.skymap
import skyMapRegistry, BaseSkyMap
40 from ..ingest
import RawIngestTask
41 from ..defineVisits
import DefineVisitsTask
42 from .repoConverter
import ConversionSubset
43 from .rootRepoConverter
import RootRepoConverter
44 from .calibRepoConverter
import CalibRepoConverter
45 from .standardRepoConverter
import StandardRepoConverter
50 """Struct containing information about a skymap that may appear in a Gen2
55 """Name of the skymap used in Gen3 data IDs.
59 """Hash computed by `BaseSkyMap.getSha1`.
63 """Name of the skymap used in Gen3 data IDs.
67 """Whether this skymap has been found in at least one repository being
74 """Specification for a Gen2 processing-output repository to convert.
78 """Absolute or relative (to the root repository) path to the Gen2
83 """Name of the `~lsst.daf.butler.CollectionType.RUN` collection datasets
84 will be inserted into (`str`).
87 chainName: Optional[str]
88 """Name of a `~lsst.daf.butler.CollectionType.CHAINED` collection that will
89 combine this repository's datasets with those of its parent repositories
94 """Collection names associated with parent repositories, used to define the
95 chained collection (`list` [ `str` ]).
97 Ignored if `chainName` is `None`. Runs used in the root repo are
98 automatically included.
103 """Sub-config used to hold the parameters of a SkyMap.
107 This config only needs to exist because we can't put a
108 `~lsst.pex.config.RegistryField` directly inside a
109 `~lsst.pex.config.ConfigDictField`.
111 It needs to have its only field named "skyMap" for compatibility with the
112 configuration of `lsst.pipe.tasks.MakeSkyMapTask`, which we want so we can
113 use one config file in an obs package to configure both.
115 This name leads to unfortunate repetition with the field named
116 "skymap" that holds it - "skyMap[name].skyMap" - but that seems
119 skyMap = skyMapRegistry.makeField(
120 doc=
"Type and parameters for the SkyMap itself.",
126 raws = ConfigurableField(
127 "Configuration for subtask responsible for ingesting raws and adding "
128 "exposure dimension entries.",
129 target=RawIngestTask,
131 defineVisits = ConfigurableField(
132 "Configuration for the subtask responsible for defining visits from "
134 target=DefineVisitsTask,
136 skyMaps = ConfigDictField(
137 "Mapping from Gen3 skymap name to the parameters used to construct a "
138 "BaseSkyMap instance. This will be used to associate names with "
139 "existing skymaps found in the Gen2 repo.",
141 itemtype=ConvertRepoSkyMapConfig,
144 rootSkyMapName = Field(
145 "Name of a Gen3 skymap (an entry in ``self.skyMaps``) to assume for "
146 "datasets in the root repository when no SkyMap is found there. ",
152 "A mapping from dataset type name to the RUN collection they should "
153 "be inserted into. This must include all datasets that can be found "
154 "in the root repository; other repositories will use per-repository "
159 "deepCoadd_skyMap":
"skymaps",
160 "brightObjectMask":
"masks",
163 storageClasses = DictField(
164 "Mapping from dataset type name or Gen2 policy entry (e.g. 'python' "
165 "or 'persistable') to the Gen3 StorageClass name.",
172 "defects":
"Defects",
173 "BaseSkyMap":
"SkyMap",
174 "BaseCatalog":
"Catalog",
175 "BackgroundList":
"Background",
177 "MultilevelParquetTable":
"DataFrame",
178 "ParquetTable":
"DataFrame",
182 formatterClasses = DictField(
183 "Mapping from dataset type name to formatter class. "
184 "By default these are derived from the formatters listed in the"
185 " Gen3 datastore configuration.",
190 targetHandlerClasses = DictField(
191 "Mapping from dataset type name to target handler class.",
196 doRegisterInstrument = Field(
197 "If True (default), add dimension records for the Instrument and its "
198 "filters and detectors to the registry instead of assuming they are "
203 doWriteCuratedCalibrations = Field(
204 "If True (default), ingest human-curated calibrations directly via "
205 "the Instrument interface. Note that these calibrations are never "
206 "converted from Gen2 repositories.",
211 "The names of reference catalogs (subdirectories under ref_cats) to "
216 fileIgnorePatterns = ListField(
217 "Filename globs that should be ignored instead of being treated as "
220 default=[
"README.txt",
"*~?",
"butler.yaml",
"gen3.sqlite3",
221 "registry.sqlite3",
"calibRegistry.sqlite3",
"_mapper",
222 "_parent",
"repositoryCfg.yaml"]
224 rawDatasetType = Field(
225 "Gen2 dataset type to use for raw data.",
229 datasetIncludePatterns = ListField(
230 "Glob-style patterns for dataset type names that should be converted.",
234 datasetIgnorePatterns = ListField(
235 "Glob-style patterns for dataset type names that should not be "
236 "converted despite matching a pattern in datasetIncludePatterns.",
241 "Key used for the Gen2 equivalent of 'detector' in data IDs.",
246 "If True (default), only convert datasets that are related to the "
247 "ingested visits. Ignored unless a list of visits is passed to "
252 curatedCalibrations = ListField(
253 "Dataset types that are handled by `Instrument.writeCuratedCalibrations()` "
254 "and thus should not be converted using the standard calibration "
255 "conversion system.",
258 "transmission_sensor",
259 "transmission_filter",
260 "transmission_optics",
261 "transmission_atmosphere",
265 doc=(
"Fully-qualified Python name of the `Instrument` subclass for "
266 "all converted datasets."),
274 return self.
raws.transfer
278 self.
raws.transfer = value
288 """A task that converts one or more related Gen2 data repositories to a
289 single Gen3 data repository (with multiple collections).
293 config: `ConvertRepoConfig`
294 Configuration for this task.
295 butler3: `lsst.daf.butler.Butler`
296 A writeable Gen3 Butler instance that represents the data repository
297 that datasets will be ingested into. If the 'raw' dataset is
298 configured to be included in the conversion, ``butler3.run`` should be
299 set to the name of the collection raws should be ingested into, and
300 ``butler3.collections`` should include a calibration collection from
301 which the ``camera`` dataset can be loaded, unless a calibration repo
302 is converted and ``doWriteCuratedCalibrations`` is `True`.
304 Other keyword arguments are forwarded to the `Task` constructor.
308 Most of the work of converting repositories is delegated to instances of
309 the `RepoConverter` hierarchy. The `ConvertRepoTask` instance itself holds
310 only state that is relevant for all Gen2 repositories being ingested, while
311 each `RepoConverter` instance holds only state relevant for the conversion
312 of a single Gen2 repository. Both the task and the `RepoConverter`
313 instances are single use; `ConvertRepoTask.run` and most `RepoConverter`
314 methods may only be called once on a particular instance.
317 ConfigClass = ConvertRepoConfig
319 _DefaultName =
"convertRepo"
321 def __init__(self, config=None, *, butler3: Butler3, **kwargs):
328 self.makeSubtask(
"raws", butler=butler3)
329 self.makeSubtask(
"defineVisits", butler=butler3)
336 for name, config
in self.config.skyMaps.items():
337 instance = config.skyMap.apply()
342 def _populateSkyMapDicts(self, name, instance):
343 struct =
ConfiguredSkyMap(name=name, sha1=instance.getSha1(), instance=instance)
348 """Return `True` if configuration indicates that the given dataset type
351 This method is intended to be called primarily by the
352 `RepoConverter` instances used interally by the task.
357 Name of the dataset type.
362 Whether the dataset should be included in the conversion.
365 any(fnmatch.fnmatchcase(datasetTypeName, pattern)
366 for pattern
in self.config.datasetIncludePatterns)
367 and not any(fnmatch.fnmatchcase(datasetTypeName, pattern)
368 for pattern
in self.config.datasetIgnorePatterns)
371 def useSkyMap(self, skyMap: BaseSkyMap, skyMapName: str) -> str:
372 """Indicate that a repository uses the given SkyMap.
374 This method is intended to be called primarily by the
375 `RepoConverter` instances used interally by the task.
379 skyMap : `lsst.skymap.BaseSkyMap`
380 SkyMap instance being used, typically retrieved from a Gen2
383 The name of the gen2 skymap, for error reporting.
388 The name of the skymap in Gen3 data IDs.
393 Raised if the specified skymap cannot be found.
395 sha1 = skyMap.getSha1()
400 except KeyError
as err:
401 msg = f
"SkyMap '{skyMapName}' with sha1={sha1} not included in configuration."
402 raise LookupError(msg)
from err
407 """Register all skymaps that have been marked as used.
409 This method is intended to be called primarily by the
410 `RepoConverter` instances used interally by the task.
414 subset : `ConversionSubset`, optional
415 Object that will be used to filter converted datasets by data ID.
416 If given, it will be updated with the tracts of this skymap that
417 overlap the visits in the subset.
421 struct.instance.register(struct.name, self.
registry)
422 if subset
is not None and self.config.relatedOnly:
423 subset.addSkyMap(self.
registry, struct.name)
426 """Indicate that a repository uses the given SkyPix dimension.
428 This method is intended to be called primarily by the
429 `RepoConverter` instances used interally by the task.
433 dimension : `lsst.daf.butler.SkyPixDimension`
434 Dimension represening a pixelization of the sky.
439 """Register all skymaps that have been marked as used.
441 This method is intended to be called primarily by the
442 `RepoConverter` instances used interally by the task.
446 subset : `ConversionSubset`, optional
447 Object that will be used to filter converted datasets by data ID.
448 If given, it will be updated with the pixelization IDs that
449 overlap the visits in the subset.
451 if subset
is not None and self.config.relatedOnly:
453 subset.addSkyPix(self.
registry, dimension)
455 def run(self, root: str, *,
456 calibs: Dict[str, str] =
None,
458 visits: Optional[Iterable[int]] =
None):
459 """Convert a group of related data repositories.
464 Complete path to the root Gen2 data repository. This should be
465 a data repository that includes a Gen2 registry and any raw files
466 and/or reference catalogs.
468 Dictionary mapping calibration repository path to the
469 `~lsst.daf.butler.CollectionType.RUN` collection that converted
470 datasets within it should be inserted into.
471 reruns : `list` of `Rerun`
472 Specifications for rerun (processing output) collections to
474 visits : iterable of `int`, optional
475 The integer IDs of visits to convert. If not provided, all visits
476 in the Gen2 root repository will be converted.
480 if visits
is not None:
483 if self.config.relatedOnly:
484 self.log.warn(
"config.relatedOnly is True but all visits are being ingested; "
485 "no filtering will be done.")
491 converters.append(rootConverter)
492 for calibRoot, run
in calibs.items():
493 if not os.path.isabs(calibRoot):
494 calibRoot = os.path.join(rootConverter.root, calibRoot)
496 mapper=rootConverter.mapper,
497 subset=rootConverter.subset)
498 converters.append(converter)
501 if not os.path.isabs(runRoot):
502 runRoot = os.path.join(rootConverter.root, runRoot)
504 subset=rootConverter.subset)
505 converters.append(converter)
508 if self.config.doRegisterInstrument:
518 rootConverter.runRawIngest()
525 if self.config.doWriteCuratedCalibrations:
526 for run
in calibs.values():
527 butler3 = Butler3(butler=self.
butler3, run=run)
528 self.
instrument.writeCuratedCalibrations(butler3)
532 rootConverter.runDefineVisits()
535 for converter
in converters:
548 for converter
in converters:
549 converter.insertDimensionData()
562 for converter
in converters:
563 converter.findDatasets()
566 for converter
in converters:
567 converter.expandDataIds()
570 for converter
in converters:
575 if spec.chainName
is not None:
576 self.
butler3.registry.registerCollection(spec.chainName, type=CollectionType.CHAINED)
577 chain = [spec.runName]
578 chain.extend(spec.parents)
579 chain.extend(rootConverter.getCollectionChain())
580 self.log.info(
"Defining %s from chain %s.", spec.chainName, chain)
581 self.
butler3.registry.setCollectionChain(spec.chainName, chain)