Coverage for python/lsst/obs/base/gen2to3/rootRepoConverter.py : 16%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RootRepoConverter"]
25import os
26import re
27import itertools
28from typing import TYPE_CHECKING, Iterator, Optional, Tuple, List, Set
30from lsst.skymap import BaseSkyMap
31from lsst.daf.butler import DatasetType, DatasetRef, FileDataset
32from .standardRepoConverter import StandardRepoConverter
34SKYMAP_DATASET_TYPES = {
35 coaddName: f"{coaddName}Coadd_skyMap" for coaddName in ("deep", "goodSeeing", "dcr")
36}
38if TYPE_CHECKING: 38 ↛ 39line 38 didn't jump to line 39, because the condition on line 38 was never true
39 from lsst.daf.butler import SkyPixDimension
40 from ..ingest import RawExposureData
43def getDataPaths(dataRefs):
44 """Strip HDU identifiers from paths and return a unique set of paths.
46 Parameters
47 ----------
48 dataRefs : `lsst.daf.persistence.ButlerDataRef`
49 The gen2 datarefs to strip "[HDU]" values from.
51 Returns
52 -------
53 paths : `set` [`str`]
54 The unique file paths without appended "[HDU]".
55 """
56 paths = set()
57 for dataRef in dataRefs:
58 path = dataRef.getUri()
59 # handle with FITS files with multiple HDUs (e.g. decam raw)
60 paths.add(path.split('[')[0])
61 return paths
64class RootRepoConverter(StandardRepoConverter):
65 """A specialization of `RepoConverter` for root data repositories.
67 `RootRepoConverter` adds support for raw images (mostly delegated to the
68 parent task's `RawIngestTask` subtask) and reference catalogs.
70 Parameters
71 ----------
72 kwds
73 Keyword arguments are forwarded to (and required by) `RepoConverter`.
74 """
76 def __init__(self, **kwds):
77 super().__init__(run=None, **kwds)
78 self._exposureData: List[RawExposureData] = []
79 self._refCats: List[Tuple[str, SkyPixDimension]] = []
80 if self.task.config.rootSkyMapName is not None:
81 self._rootSkyMap = self.task.config.skyMaps[self.task.config.rootSkyMapName].skyMap.apply()
82 else:
83 self._rootSkyMap = None
84 self._chain = None
86 def isDatasetTypeSpecial(self, datasetTypeName: str) -> bool:
87 # Docstring inherited from RepoConverter.
88 return (
89 super().isDatasetTypeSpecial(datasetTypeName)
90 or datasetTypeName in ("raw", "ref_cat", "ref_cat_config")
91 # in Gen2, some of these are in the root repo, not a calib repo
92 or datasetTypeName in self.task.config.curatedCalibrations
93 )
95 def getSpecialDirectories(self) -> List[str]:
96 # Docstring inherited from RepoConverter.
97 return super().getSpecialDirectories() + ["CALIB", "ref_cats", "rerun"]
99 def findMatchingSkyMap(self, datasetTypeName: str) -> Tuple[Optional[BaseSkyMap], Optional[str]]:
100 # Docstring inherited from StandardRepoConverter.findMatchingSkyMap.
101 skyMap, name = super().findMatchingSkyMap(datasetTypeName)
102 if skyMap is None and self.task.config.rootSkyMapName is not None:
103 self.task.log.debug(
104 ("Assuming configured root skymap with name '%s' for dataset %s."),
105 self.task.config.rootSkyMapName, datasetTypeName
106 )
107 skyMap = self._rootSkyMap
108 name = self.task.config.rootSkyMapName
109 return skyMap, name
111 def prep(self):
112 # Docstring inherited from RepoConverter.
113 # Gather information about raws.
114 if self.task.raws is not None:
115 self.task.log.info(f"Preparing raws from root {self.root}.")
116 if self.subset is not None:
117 dataRefs = itertools.chain.from_iterable(
118 self.butler2.subset(self.task.config.rawDatasetType,
119 visit=visit) for visit in self.subset.visits
120 )
121 else:
122 dataRefs = self.butler2.subset(self.task.config.rawDatasetType)
123 dataPaths = getDataPaths(dataRefs)
124 self.task.log.debug("Prepping files: %s", dataPaths)
125 self._exposureData.extend(self.task.raws.prep(dataPaths))
126 # Gather information about reference catalogs.
127 if self.task.isDatasetTypeIncluded("ref_cat") and len(self.task.config.refCats) != 0:
128 from lsst.meas.algorithms import DatasetConfig as RefCatDatasetConfig
129 for refCat in os.listdir(os.path.join(self.root, "ref_cats")):
130 path = os.path.join(self.root, "ref_cats", refCat)
131 configFile = os.path.join(path, "config.py")
132 if not os.path.exists(configFile):
133 continue
134 if refCat not in self.task.config.refCats:
135 continue
136 self.task.log.info(f"Preparing ref_cat {refCat} from root {self.root}.")
137 onDiskConfig = RefCatDatasetConfig()
138 onDiskConfig.load(configFile)
139 if onDiskConfig.indexer.name != "HTM":
140 raise ValueError(f"Reference catalog '{refCat}' uses unsupported "
141 f"pixelization '{onDiskConfig.indexer.name}'.")
142 level = onDiskConfig.indexer["HTM"].depth
143 try:
144 dimension = self.task.universe[f"htm{level}"]
145 except KeyError as err:
146 raise ValueError(f"Reference catalog {refCat} uses HTM level {level}, but no htm{level} "
147 f"skypix dimension is configured for this registry.") from err
148 self.task.useSkyPix(dimension)
149 self._refCats.append((refCat, dimension))
150 if self.task.isDatasetTypeIncluded("brightObjectMask") and self.task.config.rootSkyMapName:
151 self.task.useSkyMap(self._rootSkyMap, self.task.config.rootSkyMapName)
152 super().prep()
154 def insertDimensionData(self):
155 # Docstring inherited from RepoConverter.
156 self.task.log.info(f"Inserting observation dimension records from {self.root}.")
157 records = {"visit": [], "exposure": [], "visit_detector_region": []}
158 for exposure in self._exposureData:
159 for dimension, recordsForDimension in exposure.records.items():
160 records[dimension].extend(recordsForDimension)
161 self.task.raws.insertDimensionData(records)
163 def iterDatasets(self) -> Iterator[FileDataset]:
164 # Docstring inherited from RepoConverter.
165 # Iterate over reference catalog files.
166 for refCat, dimension in self._refCats:
167 datasetType = DatasetType(refCat, dimensions=[dimension], universe=self.task.universe,
168 storageClass="SimpleCatalog")
169 if self.subset is None:
170 regex = re.compile(r"(\d+)\.fits")
171 for fileName in os.listdir(os.path.join(self.root, "ref_cats", refCat)):
172 m = regex.match(fileName)
173 if m is not None:
174 htmId = int(m.group(1))
175 dataId = self.task.registry.expandDataId({dimension: htmId})
176 yield FileDataset(path=os.path.join(self.root, "ref_cats", refCat, fileName),
177 refs=DatasetRef(datasetType, dataId))
178 else:
179 for begin, end in self.subset.skypix[dimension]:
180 for htmId in range(begin, end):
181 dataId = self.task.registry.expandDataId({dimension: htmId})
182 yield FileDataset(path=os.path.join(self.root, "ref_cats", refCat, f"{htmId}.fits"),
183 refs=DatasetRef(datasetType, dataId))
184 yield from super().iterDatasets()
186 def ingest(self):
187 # Docstring inherited from RepoConverter.
188 self._chain = {}
189 if self.task.raws is not None:
190 self.task.log.info("Ingesting raws from root %s into run %s.", self.root,
191 self.task.raws.butler.run)
192 self.task.registry.registerDatasetType(self.task.raws.datasetType)
193 self._chain.setdefault(self.task.raws.butler.run, set()).add(self.task.raws.datasetType.name)
194 # We need te delegate to RawIngestTask to actually ingest raws,
195 # rather than just including those datasets in iterDatasets for
196 # the base class to handle, because we don't want to assume we
197 # can use the Datastore-configured Formatter for raw data.
198 for exposure in self._exposureData:
199 self.task.raws.ingestExposureDatasets(exposure)
200 super().ingest()
202 def getRun(self, datasetTypeName: str) -> str:
203 # Docstring inherited from RepoConverter.
204 run = self.task.config.runs[datasetTypeName]
205 self._chain.setdefault(run, set()).add(datasetTypeName)
206 return run
208 def getCollectionChain(self) -> List[Tuple[str, Set[str]]]:
209 """Return tuples of run name and associated dataset type names that
210 can be used to construct a chained collection that refers to the
211 converted root repository (`list` [ `tuple` ]).
212 """
213 return list(self._chain.items())