21 from __future__
import annotations
23 __all__ = [
"ConvertRepoConfig",
"ConvertRepoTask",
"ConvertRepoSkyMapConfig",
"Rerun"]
27 from dataclasses
import dataclass
28 from multiprocessing
import Pool
29 from typing
import Iterable, Optional, List, Dict
31 from lsst.daf.butler
import (
36 from lsst.pex.config
import Config, ConfigurableField, ConfigDictField, DictField, ListField, Field
37 from lsst.pipe.base
import Task
38 from lsst.skymap
import skyMapRegistry, BaseSkyMap
40 from ..ingest
import RawIngestTask
41 from ..defineVisits
import DefineVisitsTask
42 from .repoConverter
import ConversionSubset
43 from .rootRepoConverter
import RootRepoConverter
44 from .calibRepoConverter
import CalibRepoConverter
45 from .standardRepoConverter
import StandardRepoConverter
46 from .._instrument
import Instrument
51 """Struct containing information about a skymap that may appear in a Gen2
56 """Name of the skymap used in Gen3 data IDs.
60 """Hash computed by `BaseSkyMap.getSha1`.
64 """Name of the skymap used in Gen3 data IDs.
68 """Whether this skymap has been found in at least one repository being
75 """Specification for a Gen2 processing-output repository to convert.
79 """Absolute or relative (to the root repository) path to the Gen2
84 """Name of the `~lsst.daf.butler.CollectionType.RUN` collection datasets
85 will be inserted into (`str`).
88 chainName: Optional[str]
89 """Name of a `~lsst.daf.butler.CollectionType.CHAINED` collection that will
90 combine this repository's datasets with those of its parent repositories
95 """Collection names associated with parent repositories, used to define the
96 chained collection (`list` [ `str` ]).
98 Ignored if `chainName` is `None`. Runs used in the root repo are
99 automatically included.
104 """Sub-config used to hold the parameters of a SkyMap.
108 This config only needs to exist because we can't put a
109 `~lsst.pex.config.RegistryField` directly inside a
110 `~lsst.pex.config.ConfigDictField`.
112 It needs to have its only field named "skyMap" for compatibility with the
113 configuration of `lsst.pipe.tasks.MakeSkyMapTask`, which we want so we can
114 use one config file in an obs package to configure both.
116 This name leads to unfortunate repetition with the field named
117 "skymap" that holds it - "skyMap[name].skyMap" - but that seems
120 skyMap = skyMapRegistry.makeField(
121 doc=
"Type and parameters for the SkyMap itself.",
127 raws = ConfigurableField(
128 "Configuration for subtask responsible for ingesting raws and adding "
129 "exposure dimension entries.",
130 target=RawIngestTask,
132 defineVisits = ConfigurableField(
133 "Configuration for the subtask responsible for defining visits from "
135 target=DefineVisitsTask,
137 skyMaps = ConfigDictField(
138 "Mapping from Gen3 skymap name to the parameters used to construct a "
139 "BaseSkyMap instance. This will be used to associate names with "
140 "existing skymaps found in the Gen2 repo.",
142 itemtype=ConvertRepoSkyMapConfig,
145 rootSkyMapName = Field(
146 "Name of a Gen3 skymap (an entry in ``self.skyMaps``) to assume for "
147 "datasets in the root repository when no SkyMap is found there. ",
153 "A mapping from dataset type name to the RUN collection they should "
154 "be inserted into. This must include all datasets that can be found "
155 "in the root repository; other repositories will use per-repository "
160 "deepCoadd_skyMap":
"skymaps",
161 "brightObjectMask":
"masks",
164 storageClasses = DictField(
165 "Mapping from dataset type name or Gen2 policy entry (e.g. 'python' "
166 "or 'persistable') to the Gen3 StorageClass name.",
173 "defects":
"Defects",
174 "crosstalk":
"CrosstalkCalib",
175 "BaseSkyMap":
"SkyMap",
176 "BaseCatalog":
"Catalog",
177 "BackgroundList":
"Background",
179 "MultilevelParquetTable":
"DataFrame",
180 "ParquetTable":
"DataFrame",
184 formatterClasses = DictField(
185 "Mapping from dataset type name to formatter class. "
186 "By default these are derived from the formatters listed in the"
187 " Gen3 datastore configuration.",
192 targetHandlerClasses = DictField(
193 "Mapping from dataset type name to target handler class.",
198 doRegisterInstrument = Field(
199 "If True (default), add dimension records for the Instrument and its "
200 "filters and detectors to the registry instead of assuming they are "
205 doWriteCuratedCalibrations = Field(
206 "If True (default), ingest human-curated calibrations directly via "
207 "the Instrument interface. Note that these calibrations are never "
208 "converted from Gen2 repositories.",
213 "The names of reference catalogs (subdirectories under ref_cats) to "
218 fileIgnorePatterns = ListField(
219 "Filename globs that should be ignored instead of being treated as "
222 default=[
"README.txt",
"*~?",
"butler.yaml",
"gen3.sqlite3",
223 "registry.sqlite3",
"calibRegistry.sqlite3",
"_mapper",
224 "_parent",
"repositoryCfg.yaml"]
226 rawDatasetType = Field(
227 "Gen2 dataset type to use for raw data.",
231 datasetIncludePatterns = ListField(
232 "Glob-style patterns for dataset type names that should be converted.",
236 datasetIgnorePatterns = ListField(
237 "Glob-style patterns for dataset type names that should not be "
238 "converted despite matching a pattern in datasetIncludePatterns.",
243 "Key used for the Gen2 equivalent of 'detector' in data IDs.",
248 "If True (default), only convert datasets that are related to the "
249 "ingested visits. Ignored unless a list of visits is passed to "
257 return self.
raws.transfer
261 self.
raws.transfer = value
271 """A task that converts one or more related Gen2 data repositories to a
272 single Gen3 data repository (with multiple collections).
276 config: `ConvertRepoConfig`
277 Configuration for this task.
278 butler3: `lsst.daf.butler.Butler`
279 A writeable Gen3 Butler instance that represents the data repository
280 that datasets will be ingested into. If the 'raw' dataset is
281 configured to be included in the conversion, ``butler3.run`` should be
282 set to the name of the collection raws should be ingested into, and
283 ``butler3.collections`` should include a calibration collection from
284 which the ``camera`` dataset can be loaded, unless a calibration repo
285 is converted and ``doWriteCuratedCalibrations`` is `True`.
286 instrument : `lsst.obs.base.Instrument`
287 The Gen3 instrument that should be used for this conversion.
289 Other keyword arguments are forwarded to the `Task` constructor.
293 Most of the work of converting repositories is delegated to instances of
294 the `RepoConverter` hierarchy. The `ConvertRepoTask` instance itself holds
295 only state that is relevant for all Gen2 repositories being ingested, while
296 each `RepoConverter` instance holds only state relevant for the conversion
297 of a single Gen2 repository. Both the task and the `RepoConverter`
298 instances are single use; `ConvertRepoTask.run` and most `RepoConverter`
299 methods may only be called once on a particular instance.
302 ConfigClass = ConvertRepoConfig
304 _DefaultName =
"convertRepo"
306 def __init__(self, config=None, *, butler3: Butler3, instrument: Instrument, **kwargs):
313 self.makeSubtask(
"raws", butler=butler3)
314 self.makeSubtask(
"defineVisits", butler=butler3)
321 for name, config
in self.config.skyMaps.items():
322 instance = config.skyMap.apply()
328 def _reduce_kwargs(self):
330 return dict(**super()._reduce_kwargs(), butler3=self.
butler3, instrument=self.
instrument)
332 def _populateSkyMapDicts(self, name, instance):
333 struct =
ConfiguredSkyMap(name=name, sha1=instance.getSha1(), instance=instance)
338 """Return `True` if configuration indicates that the given dataset type
341 This method is intended to be called primarily by the
342 `RepoConverter` instances used interally by the task.
347 Name of the dataset type.
352 Whether the dataset should be included in the conversion.
355 any(fnmatch.fnmatchcase(datasetTypeName, pattern)
356 for pattern
in self.config.datasetIncludePatterns)
357 and not any(fnmatch.fnmatchcase(datasetTypeName, pattern)
358 for pattern
in self.config.datasetIgnorePatterns)
361 def useSkyMap(self, skyMap: BaseSkyMap, skyMapName: str) -> str:
362 """Indicate that a repository uses the given SkyMap.
364 This method is intended to be called primarily by the
365 `RepoConverter` instances used interally by the task.
369 skyMap : `lsst.skymap.BaseSkyMap`
370 SkyMap instance being used, typically retrieved from a Gen2
373 The name of the gen2 skymap, for error reporting.
378 The name of the skymap in Gen3 data IDs.
383 Raised if the specified skymap cannot be found.
385 sha1 = skyMap.getSha1()
390 except KeyError
as err:
391 msg = f
"SkyMap '{skyMapName}' with sha1={sha1} not included in configuration."
392 raise LookupError(msg)
from err
397 """Register all skymaps that have been marked as used.
399 This method is intended to be called primarily by the
400 `RepoConverter` instances used interally by the task.
404 subset : `ConversionSubset`, optional
405 Object that will be used to filter converted datasets by data ID.
406 If given, it will be updated with the tracts of this skymap that
407 overlap the visits in the subset.
411 struct.instance.register(struct.name, self.
registry)
412 if subset
is not None and self.config.relatedOnly:
413 subset.addSkyMap(self.
registry, struct.name)
416 """Indicate that a repository uses the given SkyPix dimension.
418 This method is intended to be called primarily by the
419 `RepoConverter` instances used interally by the task.
423 dimension : `lsst.daf.butler.SkyPixDimension`
424 Dimension represening a pixelization of the sky.
429 """Register all skymaps that have been marked as used.
431 This method is intended to be called primarily by the
432 `RepoConverter` instances used interally by the task.
436 subset : `ConversionSubset`, optional
437 Object that will be used to filter converted datasets by data ID.
438 If given, it will be updated with the pixelization IDs that
439 overlap the visits in the subset.
441 if subset
is not None and self.config.relatedOnly:
443 subset.addSkyPix(self.
registry, dimension)
445 def run(self, root: str, *,
446 calibs: Dict[str, str] =
None,
448 visits: Optional[Iterable[int]] =
None,
449 pool: Optional[Pool] =
None,
451 """Convert a group of related data repositories.
456 Complete path to the root Gen2 data repository. This should be
457 a data repository that includes a Gen2 registry and any raw files
458 and/or reference catalogs.
460 Dictionary mapping calibration repository path to the
461 `~lsst.daf.butler.CollectionType.CALIBRATION` collection that
462 converted datasets within it should be certified into.
463 reruns : `list` of `Rerun`
464 Specifications for rerun (processing output) collections to
466 visits : iterable of `int`, optional
467 The integer IDs of visits to convert. If not provided, all visits
468 in the Gen2 root repository will be converted.
469 pool : `multiprocessing.Pool`, optional
470 If not `None`, a process pool with which to parallelize some
472 processes : `int`, optional
473 The number of processes to use for conversion.
475 if pool
is None and processes > 1:
476 pool = Pool(processes)
479 if visits
is not None:
482 if self.config.relatedOnly:
483 self.log.warn(
"config.relatedOnly is True but all visits are being ingested; "
484 "no filtering will be done.")
490 converters.append(rootConverter)
491 for calibRoot, collection
in calibs.items():
492 if not os.path.isabs(calibRoot):
493 calibRoot = os.path.join(rootConverter.root, calibRoot)
496 mapper=rootConverter.mapper,
497 subset=rootConverter.subset)
498 converters.append(converter)
502 if not os.path.isabs(runRoot):
503 runRoot = os.path.join(rootConverter.root, runRoot)
505 instrument=self.
instrument, subset=rootConverter.subset)
506 converters.append(converter)
507 rerunConverters[spec.runName] = converter
510 if self.config.doRegisterInstrument:
515 rootConverter.runRawIngest(pool=pool)
523 if self.config.doWriteCuratedCalibrations:
524 butler3 = Butler3(butler=self.
butler3)
527 calibCollections = set()
528 for collection
in calibs.values():
530 calibCollections.add(collection)
538 defaultCalibCollection = self.
instrument.makeCollectionName(
"calib")
539 if defaultCalibCollection
not in calibCollections:
544 rootConverter.runDefineVisits(pool=pool)
547 for converter
in converters:
561 for converter
in converters:
562 converter.findDatasets()
565 for converter
in converters:
566 converter.expandDataIds()
569 for converter
in converters:
573 for converter
in converters:
578 if spec.chainName
is not None:
579 self.
butler3.registry.registerCollection(spec.chainName, type=CollectionType.CHAINED)
580 chain = [spec.runName]
581 chain.extend(rerunConverters[spec.runName].getCollectionChain())
582 for parent
in spec.parents:
584 parentConverter = rerunConverters.get(parent)
585 if parentConverter
is not None:
586 chain.extend(parentConverter.getCollectionChain())
587 chain.extend(rootConverter.getCollectionChain())
588 self.log.info(
"Defining %s from chain %s.", spec.chainName, chain)
589 self.
butler3.registry.setCollectionChain(spec.chainName, chain)