Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RootRepoConverter"] 

24 

25import os 

26import re 

27import itertools 

28from typing import TYPE_CHECKING, Iterator, Optional, Tuple, List, Set 

29 

30from lsst.skymap import BaseSkyMap 

31from lsst.daf.butler import DatasetType, DatasetRef, FileDataset 

32from .standardRepoConverter import StandardRepoConverter 

33 

34SKYMAP_DATASET_TYPES = { 

35 coaddName: f"{coaddName}Coadd_skyMap" for coaddName in ("deep", "goodSeeing", "dcr") 

36} 

37 

38if TYPE_CHECKING: 38 ↛ 39line 38 didn't jump to line 39, because the condition on line 38 was never true

39 from lsst.daf.butler import SkyPixDimension 

40 from ..ingest import RawExposureData 

41 

42 

43def getDataPaths(dataRefs): 

44 """Strip HDU identifiers from paths and return a unique set of paths. 

45 

46 Parameters 

47 ---------- 

48 dataRefs : `lsst.daf.persistence.ButlerDataRef` 

49 The gen2 datarefs to strip "[HDU]" values from. 

50 

51 Returns 

52 ------- 

53 paths : `set` [`str`] 

54 The unique file paths without appended "[HDU]". 

55 """ 

56 paths = set() 

57 for dataRef in dataRefs: 

58 path = dataRef.getUri() 

59 # handle with FITS files with multiple HDUs (e.g. decam raw) 

60 paths.add(path.split('[')[0]) 

61 return paths 

62 

63 

64class RootRepoConverter(StandardRepoConverter): 

65 """A specialization of `RepoConverter` for root data repositories. 

66 

67 `RootRepoConverter` adds support for raw images (mostly delegated to the 

68 parent task's `RawIngestTask` subtask) and reference catalogs. 

69 

70 Parameters 

71 ---------- 

72 kwds 

73 Keyword arguments are forwarded to (and required by) `RepoConverter`. 

74 """ 

75 

76 def __init__(self, **kwds): 

77 super().__init__(run=None, **kwds) 

78 self._exposureData: List[RawExposureData] = [] 

79 self._refCats: List[Tuple[str, SkyPixDimension]] = [] 

80 if self.task.config.rootSkyMapName is not None: 

81 self._rootSkyMap = self.task.config.skyMaps[self.task.config.rootSkyMapName].skyMap.apply() 

82 else: 

83 self._rootSkyMap = None 

84 self._chain = None 

85 

86 def isDatasetTypeSpecial(self, datasetTypeName: str) -> bool: 

87 # Docstring inherited from RepoConverter. 

88 return ( 

89 super().isDatasetTypeSpecial(datasetTypeName) 

90 or datasetTypeName in ("raw", "ref_cat", "ref_cat_config") 

91 # in Gen2, some of these are in the root repo, not a calib repo 

92 or datasetTypeName in self.task.config.curatedCalibrations 

93 ) 

94 

95 def getSpecialDirectories(self) -> List[str]: 

96 # Docstring inherited from RepoConverter. 

97 return super().getSpecialDirectories() + ["CALIB", "ref_cats", "rerun"] 

98 

99 def findMatchingSkyMap(self, datasetTypeName: str) -> Tuple[Optional[BaseSkyMap], Optional[str]]: 

100 # Docstring inherited from StandardRepoConverter.findMatchingSkyMap. 

101 skyMap, name = super().findMatchingSkyMap(datasetTypeName) 

102 if skyMap is None and self.task.config.rootSkyMapName is not None: 

103 self.task.log.debug( 

104 ("Assuming configured root skymap with name '%s' for dataset %s."), 

105 self.task.config.rootSkyMapName, datasetTypeName 

106 ) 

107 skyMap = self._rootSkyMap 

108 name = self.task.config.rootSkyMapName 

109 return skyMap, name 

110 

111 def prep(self): 

112 # Docstring inherited from RepoConverter. 

113 # Gather information about raws. 

114 if self.task.raws is not None: 

115 self.task.log.info(f"Preparing raws from root {self.root}.") 

116 if self.subset is not None: 

117 dataRefs = itertools.chain.from_iterable( 

118 self.butler2.subset(self.task.config.rawDatasetType, 

119 visit=visit) for visit in self.subset.visits 

120 ) 

121 else: 

122 dataRefs = self.butler2.subset(self.task.config.rawDatasetType) 

123 dataPaths = getDataPaths(dataRefs) 

124 self.task.log.debug("Prepping files: %s", dataPaths) 

125 self._exposureData.extend(self.task.raws.prep(dataPaths)) 

126 # Gather information about reference catalogs. 

127 if self.task.isDatasetTypeIncluded("ref_cat") and len(self.task.config.refCats) != 0: 

128 from lsst.meas.algorithms import DatasetConfig as RefCatDatasetConfig 

129 for refCat in os.listdir(os.path.join(self.root, "ref_cats")): 

130 path = os.path.join(self.root, "ref_cats", refCat) 

131 configFile = os.path.join(path, "config.py") 

132 if not os.path.exists(configFile): 

133 continue 

134 if refCat not in self.task.config.refCats: 

135 continue 

136 self.task.log.info(f"Preparing ref_cat {refCat} from root {self.root}.") 

137 onDiskConfig = RefCatDatasetConfig() 

138 onDiskConfig.load(configFile) 

139 if onDiskConfig.indexer.name != "HTM": 

140 raise ValueError(f"Reference catalog '{refCat}' uses unsupported " 

141 f"pixelization '{onDiskConfig.indexer.name}'.") 

142 level = onDiskConfig.indexer["HTM"].depth 

143 try: 

144 dimension = self.task.universe[f"htm{level}"] 

145 except KeyError as err: 

146 raise ValueError(f"Reference catalog {refCat} uses HTM level {level}, but no htm{level} " 

147 f"skypix dimension is configured for this registry.") from err 

148 self.task.useSkyPix(dimension) 

149 self._refCats.append((refCat, dimension)) 

150 if self.task.isDatasetTypeIncluded("brightObjectMask") and self.task.config.rootSkyMapName: 

151 self.task.useSkyMap(self._rootSkyMap, self.task.config.rootSkyMapName) 

152 super().prep() 

153 

154 def insertDimensionData(self): 

155 # Docstring inherited from RepoConverter. 

156 self.task.log.info(f"Inserting observation dimension records from {self.root}.") 

157 records = {"visit": [], "exposure": [], "visit_detector_region": []} 

158 for exposure in self._exposureData: 

159 for dimension, recordsForDimension in exposure.records.items(): 

160 records[dimension].extend(recordsForDimension) 

161 self.task.raws.insertDimensionData(records) 

162 

163 def iterDatasets(self) -> Iterator[FileDataset]: 

164 # Docstring inherited from RepoConverter. 

165 # Iterate over reference catalog files. 

166 for refCat, dimension in self._refCats: 

167 datasetType = DatasetType(refCat, dimensions=[dimension], universe=self.task.universe, 

168 storageClass="SimpleCatalog") 

169 if self.subset is None: 

170 regex = re.compile(r"(\d+)\.fits") 

171 for fileName in os.listdir(os.path.join(self.root, "ref_cats", refCat)): 

172 m = regex.match(fileName) 

173 if m is not None: 

174 htmId = int(m.group(1)) 

175 dataId = self.task.registry.expandDataId({dimension: htmId}) 

176 yield FileDataset(path=os.path.join(self.root, "ref_cats", refCat, fileName), 

177 refs=DatasetRef(datasetType, dataId)) 

178 else: 

179 for begin, end in self.subset.skypix[dimension]: 

180 for htmId in range(begin, end): 

181 dataId = self.task.registry.expandDataId({dimension: htmId}) 

182 yield FileDataset(path=os.path.join(self.root, "ref_cats", refCat, f"{htmId}.fits"), 

183 refs=DatasetRef(datasetType, dataId)) 

184 yield from super().iterDatasets() 

185 

186 def ingest(self): 

187 # Docstring inherited from RepoConverter. 

188 self._chain = {} 

189 if self.task.raws is not None: 

190 self.task.log.info("Ingesting raws from root %s into run %s.", self.root, 

191 self.task.raws.butler.run) 

192 self.task.registry.registerDatasetType(self.task.raws.datasetType) 

193 self._chain.setdefault(self.task.raws.butler.run, set()).add(self.task.raws.datasetType.name) 

194 # We need te delegate to RawIngestTask to actually ingest raws, 

195 # rather than just including those datasets in iterDatasets for 

196 # the base class to handle, because we don't want to assume we 

197 # can use the Datastore-configured Formatter for raw data. 

198 for exposure in self._exposureData: 

199 self.task.raws.ingestExposureDatasets(exposure) 

200 super().ingest() 

201 

202 def getRun(self, datasetTypeName: str) -> str: 

203 # Docstring inherited from RepoConverter. 

204 run = self.task.config.runs[datasetTypeName] 

205 self._chain.setdefault(run, set()).add(datasetTypeName) 

206 return run 

207 

208 def getCollectionChain(self) -> List[Tuple[str, Set[str]]]: 

209 """Return tuples of run name and associated dataset type names that 

210 can be used to construct a chained collection that refers to the 

211 converted root repository (`list` [ `tuple` ]). 

212 """ 

213 return list(self._chain.items())