Coverage for python/lsst/obs/base/gen2to3/rootRepoConverter.py : 16%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RootRepoConverter"]
25import os
26import re
27import itertools
28from typing import TYPE_CHECKING, Dict, Iterator, Mapping, Optional, Tuple, List
30from lsst.skymap import BaseSkyMap
31from lsst.daf.butler import CollectionType, DatasetType, DatasetRef, DimensionGraph, FileDataset
32from .standardRepoConverter import StandardRepoConverter
34SKYMAP_DATASET_TYPES = {
35 coaddName: f"{coaddName}Coadd_skyMap" for coaddName in ("deep", "goodSeeing", "dcr")
36}
38if TYPE_CHECKING: 38 ↛ 39line 38 didn't jump to line 39, because the condition on line 38 was never true
39 from lsst.daf.butler import SkyPixDimension
42def getDataPaths(dataRefs):
43 """Strip HDU identifiers from paths and return a unique set of paths.
45 Parameters
46 ----------
47 dataRefs : `lsst.daf.persistence.ButlerDataRef`
48 The gen2 datarefs to strip "[HDU]" values from.
50 Returns
51 -------
52 paths : `set` [`str`]
53 The unique file paths without appended "[HDU]".
54 """
55 paths = set()
56 for dataRef in dataRefs:
57 path = dataRef.getUri()
58 # handle with FITS files with multiple HDUs (e.g. decam raw)
59 paths.add(path.split('[')[0])
60 return paths
63class RootRepoConverter(StandardRepoConverter):
64 """A specialization of `RepoConverter` for root data repositories.
66 `RootRepoConverter` adds support for raw images (mostly delegated to the
67 parent task's `RawIngestTask` subtask) and reference catalogs.
69 Parameters
70 ----------
71 kwds
72 Keyword arguments are forwarded to (and required by) `RepoConverter`.
73 """
75 def __init__(self, **kwds):
76 super().__init__(run=None, **kwds)
77 self._refCats: Dict[str, SkyPixDimension] = {}
78 if self.task.config.rootSkyMapName is not None:
79 self._rootSkyMap = self.task.config.skyMaps[self.task.config.rootSkyMapName].skyMap.apply()
80 else:
81 self._rootSkyMap = None # All access to _rootSkyMap is guarded
82 self._rawRefs = []
84 def isDatasetTypeSpecial(self, datasetTypeName: str) -> bool:
85 # Docstring inherited from RepoConverter.
86 return (
87 super().isDatasetTypeSpecial(datasetTypeName)
88 or datasetTypeName in ("raw", "ref_cat", "ref_cat_config")
89 # in Gen2, some of these are in the root repo, not a calib repo
90 or datasetTypeName in self.instrument.getCuratedCalibrationNames()
91 )
93 def getSpecialDirectories(self) -> List[str]:
94 # Docstring inherited from RepoConverter.
95 return super().getSpecialDirectories() + ["CALIB", "ref_cats", "rerun"]
97 def findMatchingSkyMap(self, datasetTypeName: str) -> Tuple[Optional[BaseSkyMap], Optional[str]]:
98 # Docstring inherited from StandardRepoConverter.findMatchingSkyMap.
99 skyMap, name = super().findMatchingSkyMap(datasetTypeName)
100 if skyMap is None and self.task.config.rootSkyMapName is not None:
101 self.task.log.debug(
102 ("Assuming configured root skymap with name '%s' for dataset %s."),
103 self.task.config.rootSkyMapName, datasetTypeName
104 )
105 skyMap = self._rootSkyMap
106 name = self.task.config.rootSkyMapName
107 return skyMap, name
109 def runRawIngest(self, pool=None):
110 if self.task.raws is None:
111 return
112 self.task.log.info(f"Finding raws in root {self.root}.")
113 if self.subset is not None:
114 dataRefs = itertools.chain.from_iterable(
115 self.butler2.subset(self.task.config.rawDatasetType,
116 visit=visit) for visit in self.subset.visits
117 )
118 else:
119 dataRefs = self.butler2.subset(self.task.config.rawDatasetType)
120 dataPaths = getDataPaths(dataRefs)
121 self.task.log.info("Ingesting raws from root %s into run %s.", self.root, self.task.raws.butler.run)
122 self._rawRefs.extend(self.task.raws.run(dataPaths, pool=pool))
123 self._chain = [self.task.raws.butler.run]
125 def runDefineVisits(self, pool=None):
126 if self.task.defineVisits is None:
127 return
128 dimensions = DimensionGraph(self.task.universe, names=["exposure"])
129 exposureDataIds = set(ref.dataId.subset(dimensions) for ref in self._rawRefs)
130 self.task.log.info("Defining visits from exposures.")
131 self.task.defineVisits.run(exposureDataIds, pool=pool)
133 def prep(self):
134 # Docstring inherited from RepoConverter.
135 # Gather information about reference catalogs.
136 if self.task.isDatasetTypeIncluded("ref_cat") and len(self.task.config.refCats) != 0:
137 from lsst.meas.algorithms import DatasetConfig as RefCatDatasetConfig
138 for refCat in os.listdir(os.path.join(self.root, "ref_cats")):
139 path = os.path.join(self.root, "ref_cats", refCat)
140 configFile = os.path.join(path, "config.py")
141 if not os.path.exists(configFile):
142 continue
143 if refCat not in self.task.config.refCats:
144 continue
145 self.task.log.info(f"Preparing ref_cat {refCat} from root {self.root}.")
146 onDiskConfig = RefCatDatasetConfig()
147 onDiskConfig.load(configFile)
148 if onDiskConfig.indexer.name != "HTM":
149 raise ValueError(f"Reference catalog '{refCat}' uses unsupported "
150 f"pixelization '{onDiskConfig.indexer.name}'.")
151 level = onDiskConfig.indexer["HTM"].depth
152 try:
153 dimension = self.task.universe[f"htm{level}"]
154 except KeyError as err:
155 raise ValueError(f"Reference catalog {refCat} uses HTM level {level}, but no htm{level} "
156 f"skypix dimension is configured for this registry.") from err
157 self.task.useSkyPix(dimension)
158 self._refCats[refCat] = dimension
159 if self.task.isDatasetTypeIncluded("brightObjectMask") and self.task.config.rootSkyMapName:
160 self.task.useSkyMap(self._rootSkyMap, self.task.config.rootSkyMapName)
161 super().prep()
163 def iterDatasets(self) -> Iterator[FileDataset]:
164 # Docstring inherited from RepoConverter.
165 # Iterate over reference catalog files.
166 for refCat, dimension in self._refCats.items():
167 datasetType = DatasetType(refCat, dimensions=[dimension], universe=self.task.universe,
168 storageClass="SimpleCatalog")
169 if self.subset is None:
170 regex = re.compile(r"(\d+)\.fits")
171 for fileName in self.progress.wrap(os.listdir(os.path.join(self.root, "ref_cats", refCat)),
172 desc=f"Processing refcat {refCat}"):
173 m = regex.match(fileName)
174 if m is not None:
175 htmId = int(m.group(1))
176 dataId = self.task.registry.expandDataId({dimension: htmId})
177 yield FileDataset(path=os.path.join(self.root, "ref_cats", refCat, fileName),
178 refs=DatasetRef(datasetType, dataId))
179 else:
180 for begin, end in self.progress.wrap(self.subset.skypix[dimension],
181 desc=f"Processing ranges for refcat {refCat}"):
182 for htmId in range(begin, end):
183 dataId = self.task.registry.expandDataId({dimension: htmId})
184 yield FileDataset(path=os.path.join(self.root, "ref_cats", refCat, f"{htmId}.fits"),
185 refs=DatasetRef(datasetType, dataId))
186 yield from super().iterDatasets()
188 def getRun(self, datasetTypeName: str, calibDate: Optional[str] = None) -> str:
189 # Docstring inherited from RepoConverter.
190 if datasetTypeName in self._refCats:
191 return self.instrument.makeRefCatCollectionName("gen2")
192 return super().getRun(datasetTypeName, calibDate)
194 def _finish(self, datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]],
195 count: int) -> None:
196 # Docstring inherited from RepoConverter.
197 super()._finish(datasets, count)
198 if self._refCats:
199 # Set up a CHAINED collection named something like "refcats" to
200 # also point to "refcats/gen2". It's conceivable (but unlikely)
201 # that "refcats/gen2" might not exist, if the scanner saw reference
202 # catalog datasets on disk but none overlapped the area of
203 # interest, so we register that here, too (multiple registrations
204 # of collections are fine).
205 chained = self.instrument.makeRefCatCollectionName()
206 child = self.instrument.makeRefCatCollectionName("gen2")
207 self.task.registry.registerCollection(chained, CollectionType.CHAINED)
208 self.task.registry.registerCollection(child, CollectionType.RUN)
209 children = list(self.task.registry.getCollectionChain(chained))
210 children.append(child)
211 self.task.registry.setCollectionChain(chained, children)
212 # Also add "refcats" to the list of collections that contains
213 # everything found in the root repo. Normally this is done in
214 # getRun, but here we want to add the (possibly new) CHAINED
215 # collection instead of the RUN collection.
216 self._chain.append(chained)