Coverage for python/lsst/ap/verify/dataset.py: 50%

62 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-22 11:55 +0000

1# 

2# This file is part of ap_verify. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# along with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24__all__ = ["Dataset"] 

25 

26import os 

27 

28import lsst.daf.butler as dafButler 

29import lsst.obs.base as obsBase 

30from lsst.utils import getPackageDir 

31 

32 

33class Dataset: 

34 """A dataset supported by ``ap_verify``. 

35 

36 Any object of this class is guaranteed to represent a ready-for-use 

37 ap_verify dataset, barring concurrent changes to the file system or EUPS 

38 operations. Constructing a Dataset does not create a compatible output 

39 repository(ies), which can be done by calling `makeCompatibleRepo`. 

40 

41 Parameters 

42 ---------- 

43 datasetId : `str` 

44 The name of the dataset package. A tag identifying the dataset is also 

45 accepted, but this usage is deprecated. 

46 

47 Raises 

48 ------ 

49 RuntimeError 

50 Raised if `datasetId` exists, but is not correctly organized or incomplete 

51 ValueError 

52 Raised if `datasetId` could not be loaded. 

53 """ 

54 

55 def __init__(self, datasetId): 

56 self._id = datasetId 

57 

58 try: 

59 self._dataRootDir = getPackageDir(datasetId) 

60 except LookupError as e: 

61 error = f"Cannot find the {datasetId} package; is it set up?" 

62 raise ValueError(error) from e 

63 else: 

64 self._validatePackage() 

65 

66 self._initPackage(datasetId) 

67 

68 def _initPackage(self, name): 

69 """Prepare the package backing this ap_verify dataset. 

70 

71 Parameters 

72 ---------- 

73 name : `str` 

74 The EUPS package identifier for the desired package. 

75 """ 

76 # No initialization required at present 

77 pass 

78 

79 @property 

80 def datasetRoot(self): 

81 """The parent directory containing everything related to the 

82 ap_verify dataset (`str`, read-only). 

83 """ 

84 return self._dataRootDir 

85 

86 @property 

87 def rawLocation(self): 

88 """The directory containing the "raw" input data (`str`, read-only). 

89 """ 

90 return os.path.join(self.datasetRoot, 'raw') 

91 

92 @property 

93 def configLocation(self): 

94 """The directory containing configs that can be used to process the data (`str`, read-only). 

95 """ 

96 return os.path.join(self.datasetRoot, 'config') 

97 

98 @property 

99 def pipelineLocation(self): 

100 """The directory containing pipelines that can be used to process the 

101 data in Gen 3 (`str`, read-only). 

102 """ 

103 return os.path.join(self.datasetRoot, 'pipelines') 

104 

105 @property 

106 def instrument(self): 

107 """The Gen 3 instrument associated with this data (`lsst.obs.base.Instrument`, read-only). 

108 """ 

109 butler = dafButler.Butler(self._preloadedRepo, writeable=False) 

110 instruments = list(butler.registry.queryDataIds('instrument')) 

111 if len(instruments) != 1: 

112 raise RuntimeError(f"Dataset does not have exactly one instrument; got {instruments}.") 

113 else: 

114 return obsBase.Instrument.fromName(instruments[0]["instrument"], butler.registry) 

115 

116 @property 

117 def _preloadedRepo(self): 

118 """The directory containing the pre-ingested Gen 3 repo (`str`, read-only). 

119 """ 

120 return os.path.join(self.datasetRoot, 'preloaded') 

121 

122 @property 

123 def _preloadedExport(self): 

124 """The file containing an exported registry of `_preloadedRepo` (`str`, read-only). 

125 """ 

126 return os.path.join(self.configLocation, 'export.yaml') 

127 

128 def _validatePackage(self): 

129 """Confirm that the dataset directory satisfies all assumptions. 

130 

131 Raises 

132 ------ 

133 RuntimeError 

134 Raised if the package represented by this object does not conform to the 

135 dataset framework 

136 

137 Notes 

138 ----- 

139 Requires that `self._dataRootDir` has been initialized. 

140 """ 

141 if not os.path.exists(self.datasetRoot): 

142 raise RuntimeError('Could not find dataset at ' + self.datasetRoot) 

143 if not os.path.exists(self.rawLocation): 

144 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing data directory') 

145 

146 def __eq__(self, other): 

147 """Test that two Dataset objects are equal. 

148 

149 Two objects are equal iff they refer to the same ap_verify dataset. 

150 """ 

151 return self.datasetRoot == other.datasetRoot 

152 

153 def __repr__(self): 

154 """A string representation that can be used to reconstruct the dataset. 

155 """ 

156 return f"Dataset({self._id!r})" 

157 

158 def makeCompatibleRepoGen3(self, repoDir): 

159 """Set up a directory as a Gen 3 repository compatible with this ap_verify dataset. 

160 

161 If the repository already exists, this call has no effect. 

162 

163 Parameters 

164 ---------- 

165 repoDir : `str` 

166 The directory where the output repository will be created. 

167 """ 

168 # No way to tell makeRepo "create only what's missing" 

169 try: 

170 seedConfig = dafButler.Config() 

171 # Checksums greatly slow importing of large repositories 

172 seedConfig["datastore", "checksum"] = False 

173 repoConfig = dafButler.Butler.makeRepo(repoDir, config=seedConfig) 

174 butler = dafButler.Butler(repoConfig, writeable=True) 

175 butler.import_(directory=self._preloadedRepo, filename=self._preloadedExport, 

176 transfer="auto") 

177 except FileExistsError: 

178 pass 

179 

180 

181def _isRepo(repoDir): 

182 """Test whether a directory has been set up as a repository. 

183 """ 

184 return os.path.exists(os.path.join(repoDir, '_mapper')) \ 

185 or os.path.exists(os.path.join(repoDir, 'repositoryCfg.yaml'))