22 __all__ = (
"RepoConverter",
"DataIdExtractor")
26 from collections
import OrderedDict
31 import lsst.daf.persistence.repositoryCfg
33 from lsst.daf.butler
import DataId, DatasetType, DatasetRef
34 from lsst.daf.butler.gen2convert
import FilePathParser, Translator
35 from lsst.log
import Log
36 from lsst.log.utils
import temporaryLogLevel
41 """Find the mapper class associated with a Gen2 data repository root. 46 Path to a Gen2 repository root directory. 51 A subclass of `lsst.obs.base.CameraMapper`. 56 Raised if the directory does not appear to be the root of a 59 cfgPath = os.path.join(root,
"repositoryCfg.yaml")
60 if os.path.exists(cfgPath):
61 with open(cfgPath,
"r") as f: 62 repoCfg = yaml.load(f, Loader=yaml.UnsafeLoader) 64 parentLinkPath = os.path.join(root,
"_parent")
65 if os.path.exists(parentLinkPath):
67 mapperFilePath = os.path.join(root,
"_mapper")
68 if os.path.exists(mapperFilePath):
69 with open(mapperFilePath,
"r") as f: 70 mapperClassPath = f.read().strip() 71 return doImport(mapperClassPath)
72 calibRegistryPath = os.path.join(root,
"calibRegistry.sqlite3")
73 if os.path.exists(calibRegistryPath):
74 return findMapperClass(os.path.normpath(os.path.join(root, os.path.pardir)))
75 raise ValueError(f
"Could not determine (Gen2) mapper class for repo at '{root}'.")
79 """A class that extracts Gen3 data IDs from Gen2 filenames for a 80 particular dataset type. 84 datasetTypeName : `str` 85 Name of the dataset type the object will process. 86 storageClass : `str` or `lsst.daf.butler.StorageClass` 87 Gen3 storage class of the dataset type. 88 universe : `lsst.daf.butler.DimensionUniverse` 89 Object containing all dimension definitions. 91 Key-value pairs that may need to appear in the Gen3 data ID, but can 92 never be inferred from a Gen2 filename. This should always include 93 the instrument name (even Gen3 data IDs that don't involve the 94 instrument dimension have instrument-dependent Gen2 filenames) and 95 should also include the skymap name for any data ID that involves 97 filePathParser : `lsst.daf.butler.gen2convert.FilePathParser`, optional 98 Object responsible for reading a Gen2 data ID from a filename. Will 99 be created from ``mapper`` if not provided. 100 translator : `lsst.daf.butler.gen2convert.Translator`, optional 101 Object responsible for converting a Gen2 data ID into a Gen3 data ID. 102 Will be created if not provided. 103 mapper : `lsst.obs.base.CameraMapper`, optional 104 Object that defines Gen2 filename templates. Must be provided if 105 ``filePathParser`` is not. 106 skyMap : `lsst.skymap.BaseSkyMap`, optional 107 SkyMap that defines tracts and patches. Must be provided for datasets 108 with a ``patch`` key in their data IDs. 111 def __init__(self, datasetTypeName, storageClass, *, universe, baseDataId,
112 filePathParser=None, translator=None, mapper=None, skyMap=None):
113 if filePathParser
is None:
114 filePathParser = FilePathParser.fromMapping(mapper.mappings[datasetTypeName])
116 if translator
is None:
117 translator = Translator.makeMatching(filePathParser.datasetType, baseDataId, skyMap=skyMap)
120 storageClass=storageClass, universe=universe)
123 """Extract a Gen3 data ID from the given filename, 127 fileNameInRoot : `str` 128 Filename relative to a Gen2 data repository root. 132 dataId : `lsst.daf.butler.DataId` or `None` 133 The Gen3 data ID, or `None` if the file was not recognized as an 134 instance of the extractor's dataset type. 143 """A helper class that ingests (some of) the contents of a Gen2 data 144 repository into a Gen3 data repository. 149 Root of the Gen2 data repository. 150 universe : `lsst.daf.butler.DimensionUniverse` 151 Object containing all dimension definitions. 153 Key-value pairs that may need to appear in the Gen3 data ID, but can 154 never be inferred from a Gen2 filename. This should always include 155 the instrument name (even Gen3 data IDs that don't involve the 156 instrument dimension have instrument-dependent Gen2 filenames) and 157 should also include the skymap name in order to process any data IDs 158 that involve tracts or patches. 159 mapper : `lsst.obs.base.CameraMapper`, optional 160 Object that defines Gen2 filename templates. Will be identified, 161 imported, and constructed from ``root`` if not provided. 162 skyMap : `lsst.skymap.BaseSkyMap`, optional 163 SkyMap that defines tracts and patches. Must be provided in order to 164 provess datasets with a ``patch`` key in their data IDs. 167 COADD_NAMES = (
"deep",
"goodSeeing",
"dcr")
168 REPO_ROOT_FILES = (
"registry.sqlite3",
"_mapper",
"repositoryCfg.yaml",
169 "calibRegistry.sqlite3",
"_parent")
171 def __init__(self, root, *, universe, baseDataId, mapper=None, skyMap=None):
178 with temporaryLogLevel(
"CameraMapper", Log.ERROR):
179 with temporaryLogLevel(
"HscMapper", Log.ERROR):
181 mapper = cls(root=root)
186 if "skymap" in baseDataId:
189 mapping = self.
mapper.mappings.get(f
"{name}Coadd_skyMap",
None)
192 filename = os.path.join(self.
root, mapping.template)
193 if os.path.exists(filename):
194 if skyMap
is not None:
195 raise ValueError(
"Multiple SkyMaps found in repository; please use multiple " 196 "RepoConverters with an explicit skyMap argument for each.")
197 with open(filename,
"rb")
as f:
198 skyMap = pickle.load(f, encoding=
"latin1")
202 """Add a dataset type to those recognized by the converter. 206 datasetTypeName : `str` 207 String name of the dataset type. 208 storageClass : `str` or `lsst.daf.butler.StorageClass` 209 Gen3 storage class of the dataset type. 213 extractor : `DataIdExtractor` 214 The object that will be used to extract data IDs for instances of 215 this dataset type (also held internally, so the return value can 224 """Extract a Gen3 `~lsst.daf.butler.DatasetRef` from a filename in a 225 Gen2 data repository. 229 fileNameInRoot : `str` 230 Name of the file, relative to the root of its Gen2 repository. 234 ref : `lsst.daf.butler.DatasetRef` or `None` 235 Reference to the Gen3 dataset that would be created by converting 236 this file, or `None` if the file is not recognized as an instance 237 of a dataset type known to this converter. 239 for datasetTypeName, extractor
in self.
extractors.items():
240 dataId = extractor.apply(fileNameInRoot)
241 if dataId
is not None:
245 self.
extractors.move_to_end(datasetTypeName, last=
False)
246 return DatasetRef(extractor.datasetType, dataId=dataId)
250 """Recursively a (subset of) a Gen2 data repository, yielding files 251 that may be convertible. 255 directory : `str`, optional 256 A subdirectory of the repository root to process, instead of 257 processing the entire repository. 258 skipDirs : sequence of `str` 259 Subdirectories that should be skipped. 263 fileNameInRoot : `str` 264 Name of a file in the repository, relative to the root of the 267 if directory
is None:
268 directory = self.
root 269 for dirPath, subdirNamesInDir, fileNamesInDir
in os.walk(directory, followlinks=
True):
272 def isRepoRoot(dirName):
273 return any(os.path.exists(os.path.join(dirPath, dirName, f))
275 subdirNamesInDir[:] = [d
for d
in subdirNamesInDir
if not isRepoRoot(d)
and d
not in skipDirs]
279 dirPathInRoot = dirPath[len(self.
root) + len(os.path.sep):]
280 for fileNameInDir
in fileNamesInDir:
281 fileNameInRoot = os.path.join(dirPathInRoot, fileNameInDir)
286 def convertRepo(self, butler, *, directory=None, transfer=None, formatter=None, skipDirs=()):
287 """Ingest all recognized files into a Gen3 repository. 291 butler : `lsst.daf.butler.Butler` 292 Gen3 butler that files should be ingested into. 293 directory : `str`, optional 294 A subdirectory of the repository root to process, instead of 295 processing the entire repository. 296 transfer : str, optional 297 If not `None`, must be one of 'move', 'copy', 'hardlink', or 298 'symlink' indicating how to transfer the file. 299 formatter : `lsst.daf.butler.Formatter`, optional 300 Formatter that should be used to retreive the Dataset. If not 301 provided, the formatter will be constructed according to 302 Datastore configuration. This should only be used when converting 303 only a single dataset type multiple dataset types of the same 305 skipDirs : sequence of `str` 306 Subdirectories that should be skipped. 308 log = Log.getLogger(
"RepoConverter")
310 butler.registry.registerDatasetType(extractor.datasetType)
312 for file
in self.
walkRepo(directory=directory, skipDirs=skipDirs):
316 butler.ingest(os.path.join(self.
root, file), ref, transfer=transfer, formatter=formatter)
317 except Exception
as err:
318 skipped.setdefault(type(err), []).append(str(err))
320 for cls, messages
in skipped.items():
321 log.warn(
"Skipped %s files due to exceptions of type %s.", len(messages), cls.__name__)
322 if log.isDebugEnabled():
323 for message
in messages:
def findMapperClass(root)
def extractDatasetRef(self, fileNameInRoot)
def convertRepo(self, butler, directory=None, transfer=None, formatter=None, skipDirs=())
def walkRepo(self, directory=None, skipDirs=())
def addDatasetType(self, datasetTypeName, storageClass)
def __init__(self, root, universe, baseDataId, mapper=None, skyMap=None)