21 from __future__
import annotations
23 __all__ = [
"ConvertRepoConfig",
"ConvertRepoTask",
"ConvertRepoSkyMapConfig",
"Rerun"]
27 from dataclasses
import dataclass
28 from typing
import Iterable, Optional, List, Dict
30 from lsst.daf.butler
import (
35 from lsst.pex.config
import Config, ConfigurableField, ConfigDictField, DictField, ListField, Field
36 from lsst.pipe.base
import Task
37 from lsst.skymap
import skyMapRegistry, BaseSkyMap
39 from ..ingest
import RawIngestTask
40 from ..defineVisits
import DefineVisitsTask
41 from .repoConverter
import ConversionSubset
42 from .rootRepoConverter
import RootRepoConverter
43 from .calibRepoConverter
import CalibRepoConverter
44 from .standardRepoConverter
import StandardRepoConverter
45 from .._instrument
import Instrument
50 """Struct containing information about a skymap that may appear in a Gen2
55 """Name of the skymap used in Gen3 data IDs.
59 """Hash computed by `BaseSkyMap.getSha1`.
63 """Name of the skymap used in Gen3 data IDs.
67 """Whether this skymap has been found in at least one repository being
74 """Specification for a Gen2 processing-output repository to convert.
78 """Absolute or relative (to the root repository) path to the Gen2
83 """Name of the `~lsst.daf.butler.CollectionType.RUN` collection datasets
84 will be inserted into (`str`).
87 chainName: Optional[str]
88 """Name of a `~lsst.daf.butler.CollectionType.CHAINED` collection that will
89 combine this repository's datasets with those of its parent repositories
94 """Collection names associated with parent repositories, used to define the
95 chained collection (`list` [ `str` ]).
97 Ignored if `chainName` is `None`. Runs used in the root repo are
98 automatically included.
103 """Sub-config used to hold the parameters of a SkyMap.
107 This config only needs to exist because we can't put a
108 `~lsst.pex.config.RegistryField` directly inside a
109 `~lsst.pex.config.ConfigDictField`.
111 It needs to have its only field named "skyMap" for compatibility with the
112 configuration of `lsst.pipe.tasks.MakeSkyMapTask`, which we want so we can
113 use one config file in an obs package to configure both.
115 This name leads to unfortunate repetition with the field named
116 "skymap" that holds it - "skyMap[name].skyMap" - but that seems
119 skyMap = skyMapRegistry.makeField(
120 doc=
"Type and parameters for the SkyMap itself.",
126 raws = ConfigurableField(
127 "Configuration for subtask responsible for ingesting raws and adding "
128 "exposure dimension entries.",
129 target=RawIngestTask,
131 defineVisits = ConfigurableField(
132 "Configuration for the subtask responsible for defining visits from "
134 target=DefineVisitsTask,
136 skyMaps = ConfigDictField(
137 "Mapping from Gen3 skymap name to the parameters used to construct a "
138 "BaseSkyMap instance. This will be used to associate names with "
139 "existing skymaps found in the Gen2 repo.",
141 itemtype=ConvertRepoSkyMapConfig,
144 rootSkyMapName = Field(
145 "Name of a Gen3 skymap (an entry in ``self.skyMaps``) to assume for "
146 "datasets in the root repository when no SkyMap is found there. ",
152 "A mapping from dataset type name to the RUN collection they should "
153 "be inserted into. This must include all datasets that can be found "
154 "in the root repository; other repositories will use per-repository "
159 "deepCoadd_skyMap":
"skymaps",
160 "brightObjectMask":
"masks",
163 storageClasses = DictField(
164 "Mapping from dataset type name or Gen2 policy entry (e.g. 'python' "
165 "or 'persistable') to the Gen3 StorageClass name.",
172 "defects":
"Defects",
173 "crosstalk":
"CrosstalkCalib",
174 "BaseSkyMap":
"SkyMap",
175 "BaseCatalog":
"Catalog",
176 "BackgroundList":
"Background",
178 "MultilevelParquetTable":
"DataFrame",
179 "ParquetTable":
"DataFrame",
183 formatterClasses = DictField(
184 "Mapping from dataset type name to formatter class. "
185 "By default these are derived from the formatters listed in the"
186 " Gen3 datastore configuration.",
191 targetHandlerClasses = DictField(
192 "Mapping from dataset type name to target handler class.",
197 doRegisterInstrument = Field(
198 "If True (default), add dimension records for the Instrument and its "
199 "filters and detectors to the registry instead of assuming they are "
204 doWriteCuratedCalibrations = Field(
205 "If True (default), ingest human-curated calibrations directly via "
206 "the Instrument interface. Note that these calibrations are never "
207 "converted from Gen2 repositories.",
212 "The names of reference catalogs (subdirectories under ref_cats) to "
217 fileIgnorePatterns = ListField(
218 "Filename globs that should be ignored instead of being treated as "
221 default=[
"README.txt",
"*~?",
"butler.yaml",
"gen3.sqlite3",
222 "registry.sqlite3",
"calibRegistry.sqlite3",
"_mapper",
223 "_parent",
"repositoryCfg.yaml"]
225 rawDatasetType = Field(
226 "Gen2 dataset type to use for raw data.",
230 datasetIncludePatterns = ListField(
231 "Glob-style patterns for dataset type names that should be converted.",
235 datasetIgnorePatterns = ListField(
236 "Glob-style patterns for dataset type names that should not be "
237 "converted despite matching a pattern in datasetIncludePatterns.",
242 "Key used for the Gen2 equivalent of 'detector' in data IDs.",
247 "If True (default), only convert datasets that are related to the "
248 "ingested visits. Ignored unless a list of visits is passed to "
256 return self.
raws.transfer
260 self.
raws.transfer = value
270 """A task that converts one or more related Gen2 data repositories to a
271 single Gen3 data repository (with multiple collections).
275 config: `ConvertRepoConfig`
276 Configuration for this task.
277 butler3: `lsst.daf.butler.Butler`
278 A writeable Gen3 Butler instance that represents the data repository
279 that datasets will be ingested into. If the 'raw' dataset is
280 configured to be included in the conversion, ``butler3.run`` should be
281 set to the name of the collection raws should be ingested into, and
282 ``butler3.collections`` should include a calibration collection from
283 which the ``camera`` dataset can be loaded, unless a calibration repo
284 is converted and ``doWriteCuratedCalibrations`` is `True`.
286 Other keyword arguments are forwarded to the `Task` constructor.
290 Most of the work of converting repositories is delegated to instances of
291 the `RepoConverter` hierarchy. The `ConvertRepoTask` instance itself holds
292 only state that is relevant for all Gen2 repositories being ingested, while
293 each `RepoConverter` instance holds only state relevant for the conversion
294 of a single Gen2 repository. Both the task and the `RepoConverter`
295 instances are single use; `ConvertRepoTask.run` and most `RepoConverter`
296 methods may only be called once on a particular instance.
299 ConfigClass = ConvertRepoConfig
301 _DefaultName =
"convertRepo"
303 def __init__(self, config=None, *, butler3: Butler3, instrument: Instrument, **kwargs):
310 self.makeSubtask(
"raws", butler=butler3)
311 self.makeSubtask(
"defineVisits", butler=butler3)
318 for name, config
in self.config.skyMaps.items():
319 instance = config.skyMap.apply()
325 def _populateSkyMapDicts(self, name, instance):
326 struct =
ConfiguredSkyMap(name=name, sha1=instance.getSha1(), instance=instance)
331 """Return `True` if configuration indicates that the given dataset type
334 This method is intended to be called primarily by the
335 `RepoConverter` instances used interally by the task.
340 Name of the dataset type.
345 Whether the dataset should be included in the conversion.
348 any(fnmatch.fnmatchcase(datasetTypeName, pattern)
349 for pattern
in self.config.datasetIncludePatterns)
350 and not any(fnmatch.fnmatchcase(datasetTypeName, pattern)
351 for pattern
in self.config.datasetIgnorePatterns)
354 def useSkyMap(self, skyMap: BaseSkyMap, skyMapName: str) -> str:
355 """Indicate that a repository uses the given SkyMap.
357 This method is intended to be called primarily by the
358 `RepoConverter` instances used interally by the task.
362 skyMap : `lsst.skymap.BaseSkyMap`
363 SkyMap instance being used, typically retrieved from a Gen2
366 The name of the gen2 skymap, for error reporting.
371 The name of the skymap in Gen3 data IDs.
376 Raised if the specified skymap cannot be found.
378 sha1 = skyMap.getSha1()
383 except KeyError
as err:
384 msg = f
"SkyMap '{skyMapName}' with sha1={sha1} not included in configuration."
385 raise LookupError(msg)
from err
390 """Register all skymaps that have been marked as used.
392 This method is intended to be called primarily by the
393 `RepoConverter` instances used interally by the task.
397 subset : `ConversionSubset`, optional
398 Object that will be used to filter converted datasets by data ID.
399 If given, it will be updated with the tracts of this skymap that
400 overlap the visits in the subset.
404 struct.instance.register(struct.name, self.
registry)
405 if subset
is not None and self.config.relatedOnly:
406 subset.addSkyMap(self.
registry, struct.name)
409 """Indicate that a repository uses the given SkyPix dimension.
411 This method is intended to be called primarily by the
412 `RepoConverter` instances used interally by the task.
416 dimension : `lsst.daf.butler.SkyPixDimension`
417 Dimension represening a pixelization of the sky.
422 """Register all skymaps that have been marked as used.
424 This method is intended to be called primarily by the
425 `RepoConverter` instances used interally by the task.
429 subset : `ConversionSubset`, optional
430 Object that will be used to filter converted datasets by data ID.
431 If given, it will be updated with the pixelization IDs that
432 overlap the visits in the subset.
434 if subset
is not None and self.config.relatedOnly:
436 subset.addSkyPix(self.
registry, dimension)
438 def run(self, root: str, *,
439 calibs: Dict[str, str] =
None,
441 visits: Optional[Iterable[int]] =
None):
442 """Convert a group of related data repositories.
447 Complete path to the root Gen2 data repository. This should be
448 a data repository that includes a Gen2 registry and any raw files
449 and/or reference catalogs.
451 Dictionary mapping calibration repository path to the
452 `~lsst.daf.butler.CollectionType.CALIBRATION` collection that
453 converted datasets within it should be certified into.
454 reruns : `list` of `Rerun`
455 Specifications for rerun (processing output) collections to
457 visits : iterable of `int`, optional
458 The integer IDs of visits to convert. If not provided, all visits
459 in the Gen2 root repository will be converted.
463 if visits
is not None:
466 if self.config.relatedOnly:
467 self.log.warn(
"config.relatedOnly is True but all visits are being ingested; "
468 "no filtering will be done.")
474 converters.append(rootConverter)
475 for calibRoot, collection
in calibs.items():
476 if not os.path.isabs(calibRoot):
477 calibRoot = os.path.join(rootConverter.root, calibRoot)
480 mapper=rootConverter.mapper,
481 subset=rootConverter.subset)
482 converters.append(converter)
486 if not os.path.isabs(runRoot):
487 runRoot = os.path.join(rootConverter.root, runRoot)
489 instrument=self.
instrument, subset=rootConverter.subset)
490 converters.append(converter)
491 rerunConverters[spec.runName] = converter
494 if self.config.doRegisterInstrument:
504 rootConverter.runRawIngest()
512 if self.config.doWriteCuratedCalibrations:
513 butler3 = Butler3(butler=self.
butler3)
516 calibCollections = set()
517 for collection
in calibs.values():
519 calibCollections.add(collection)
527 defaultCalibCollection = self.
instrument.makeCollectionName(
"calib")
528 if defaultCalibCollection
not in calibCollections:
533 rootConverter.runDefineVisits()
536 for converter
in converters:
550 for converter
in converters:
551 converter.findDatasets()
554 for converter
in converters:
555 converter.expandDataIds()
558 for converter
in converters:
562 for converter
in converters:
567 if spec.chainName
is not None:
568 self.
butler3.registry.registerCollection(spec.chainName, type=CollectionType.CHAINED)
569 chain = [spec.runName]
570 chain.extend(rerunConverters[spec.runName].getCollectionChain())
571 for parent
in spec.parents:
572 chain.append(spec.parent)
573 parentConverter = rerunConverters.get(parent)
574 if parentConverter
is not None:
575 chain.extend(parentConverter.getCollectionChain())
576 chain.extend(rootConverter.getCollectionChain())
577 self.log.info(
"Defining %s from chain %s.", spec.chainName, chain)
578 self.
butler3.registry.setCollectionChain(spec.chainName, chain)