Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

# This file is part of obs_base. 

# 

# Developed for the LSST Data Management System. 

# This product includes software developed by the LSST Project 

# (http://www.lsst.org). 

# See the COPYRIGHT file at the top-level directory of this distribution 

# for details of code ownership. 

# 

# This program is free software: you can redistribute it and/or modify 

# it under the terms of the GNU General Public License as published by 

# the Free Software Foundation, either version 3 of the License, or 

# (at your option) any later version. 

# 

# This program is distributed in the hope that it will be useful, 

# but WITHOUT ANY WARRANTY; without even the implied warranty of 

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

# GNU General Public License for more details. 

# 

# You should have received a copy of the GNU General Public License 

# along with this program. If not, see <http://www.gnu.org/licenses/>. 

from __future__ import annotations 

 

__all__ = ["RootRepoConverter"] 

 

import os 

import re 

import itertools 

from typing import TYPE_CHECKING, Iterator, Tuple, List 

 

from lsst.daf.butler import DatasetType, DatasetRef 

from .calibRepoConverter import CURATED_CALIBRATION_DATASET_TYPES 

from .standardRepoConverter import StandardRepoConverter 

 

SKYMAP_DATASET_TYPES = { 

coaddName: f"{coaddName}Coadd_skyMap" for coaddName in ("deep", "goodSeeing", "dcr") 

} 

 

38 ↛ 39line 38 didn't jump to line 39, because the condition on line 38 was never trueif TYPE_CHECKING: 

from lsst.daf.butler import SkyPixDimension 

from ..ingest import RawExposureData 

 

 

class RootRepoConverter(StandardRepoConverter): 

"""A specialization of `RepoConverter` for root data repositories. 

 

`RootRepoConverter` adds support for raw images (mostly delegated to the 

parent task's `RawIngestTask` subtask) and reference catalogs. 

 

Parameters 

---------- 

kwds 

Keyword arguments are forwarded to (and required by) `RepoConverter`. 

""" 

 

def __init__(self, **kwds): 

super().__init__(**kwds) 

self._exposureData: List[RawExposureData] = [] 

self._refCats: List[Tuple[str, SkyPixDimension]] = [] 

 

def isDatasetTypeSpecial(self, datasetTypeName: str) -> bool: 

# Docstring inherited from RepoConverter. 

return ( 

super().isDatasetTypeSpecial(datasetTypeName) or 

datasetTypeName in ("raw", "ref_cat", "ref_cat_config") or 

# in Gen2, some of these are in the root repo, not a calib repo 

datasetTypeName in CURATED_CALIBRATION_DATASET_TYPES 

) 

 

def isDirectorySpecial(self, subdirectory: str) -> bool: 

# Docstring inherited from RepoConverter. 

return subdirectory == "ref_cats" 

 

def prep(self): 

# Docstring inherited from RepoConverter. 

# Gather information about raws. 

if self.task.raws is not None: 

self.task.log.info(f"Preparing raws from root {self.root}.") 

if self.subset is not None: 

dataRefs = itertools.chain.from_iterable( 

self.butler2.subset("raw", visit=visit) for visit in self.subset.visits 

) 

else: 

dataRefs = self.butler2.subset("raw") 

self._exposureData.extend(self.task.raws.prep(dataRef.getUri() for dataRef in dataRefs)) 

# Gather information about reference catalogs. 

if self.task.isDatasetTypeIncluded("ref_cat"): 

from lsst.meas.algorithms import DatasetConfig as RefCatDatasetConfig 

for refCat in os.listdir(os.path.join(self.root, "ref_cats")): 

self.task.log.info(f"Preparing ref_cat {refCat} from root {self.root}.") 

path = os.path.join(self.root, "ref_cats", refCat) 

configFile = os.path.join(path, "config.py") 

if not os.path.exists(configFile): 

continue 

if not self.task.isDatasetTypeIncluded(refCat): 

# While the Gen2 dataset type for reference catalogs is 

# just "ref_cat", in Gen3 we use the name of the reference 

# catalog as its dataset type name. 

continue 

onDiskConfig = RefCatDatasetConfig() 

onDiskConfig.load(configFile) 

if onDiskConfig.indexer.name != "HTM": 

raise ValueError(f"Reference catalog '{refCat}' uses unsupported " 

f"pixelization '{onDiskConfig.indexer.name}'.") 

level = onDiskConfig.indexer["HTM"].depth 

try: 

dimension = self.task.universe[f"htm{level}"] 

except KeyError as err: 

raise ValueError(f"Reference catalog {refCat} uses HTM level {level}, but no htm{level} " 

f"skypix dimension is configured for this registry.") from err 

self.task.useSkyPix(dimension) 

self._refCats.append((refCat, dimension)) 

super().prep() 

 

def insertDimensionData(self): 

# Docstring inherited from RepoConverter. 

self.task.log.info(f"Inserting observation dimension records from {self.root}.") 

records = {"visit": [], "exposure": [], "visit_detector_region": []} 

for exposure in self._exposureData: 

for dimension, recordsForDimension in exposure.records.items(): 

records[dimension].extend(recordsForDimension) 

self.task.raws.insertDimensionData(records) 

 

def iterDatasets(self) -> Iterator[Tuple[str, DatasetRef]]: 

# Docstring inherited from RepoConverter. 

# Iterate over reference catalog files. 

for refCat, dimension in self._refCats: 

datasetType = DatasetType(refCat, dimensions=[dimension], universe=self.task.universe, 

storageClass="SimpleCatalog") 

if self.subset is None: 

regex = re.compile(r"(\d+)\.fits") 

for fileName in os.listdir(os.path.join(self.root, "ref_cats", refCat)): 

m = regex.match(fileName) 

if m is not None: 

htmId = int(m.group(1)) 

dataId = self.task.registry.expandDataId({dimension: htmId}) 

yield os.path.join("ref_cats", refCat, fileName), DatasetRef(datasetType, dataId) 

else: 

for htmId in self.subset.skypix[dimension]: 

dataId = self.task.registry.expandDataId({dimension: htmId}) 

yield os.path.join("ref_cats", refCat, f"{htmId}.fits"), DatasetRef(datasetType, dataId) 

yield from super().iterDatasets() 

 

def ingest(self): 

# Docstring inherited from RepoConverter. 

if self.task.raws is not None: 

self.task.log.info(f"Ingesting raws from root {self.root}.") 

self.task.registry.registerDatasetType(self.task.raws.datasetType) 

# We need te delegate to RawIngestTask to actually ingest raws, 

# rather than just including those datasets in iterDatasets for 

# the base class to handle, because we don't want to assume we 

# can use the Datastore-configured Formatter for raw data. 

refs = [] 

butler, collections = self.getButler("raw") 

for exposure in self._exposureData: 

refs.extend(self.task.raws.ingestExposureDatasets(exposure)) 

for collection in collections: 

self.task.registry.associate(collection, refs) 

super().ingest()