Coverage for python / lsst / ap / verify / dataset.py: 36%

68 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-04 17:46 +0000

1# 

2# This file is part of ap_verify. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# along with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24__all__ = ["Dataset"] 

25 

26import os 

27 

28import lsst.daf.butler as dafButler 

29import lsst.obs.base as obsBase 

30from lsst.utils import getPackageDir 

31 

32 

33class Dataset: 

34 """A dataset supported by ``ap_verify``. 

35 

36 Any object of this class is guaranteed to represent a ready-for-use 

37 ap_verify dataset, barring concurrent changes to the file system or EUPS 

38 operations. Constructing a Dataset does not create a compatible output 

39 repository(ies), which can be done by calling `makeCompatibleRepo`. 

40 

41 Parameters 

42 ---------- 

43 datasetId : `str` 

44 The name of the dataset package. A tag identifying the dataset is also 

45 accepted, but this usage is deprecated. 

46 

47 Raises 

48 ------ 

49 RuntimeError 

50 Raised if `datasetId` exists, but is not correctly organized or incomplete 

51 ValueError 

52 Raised if `datasetId` could not be loaded. 

53 """ 

54 

55 def __init__(self, datasetId): 

56 self._id = datasetId 

57 

58 try: 

59 self._dataRootDir = getPackageDir(datasetId) 

60 except LookupError as e: 

61 error = f"Cannot find the {datasetId} package; is it set up?" 

62 raise ValueError(error) from e 

63 else: 

64 self._validatePackage() 

65 

66 self._initPackage(datasetId) 

67 

68 def _initPackage(self, name): 

69 """Prepare the package backing this ap_verify dataset. 

70 

71 Parameters 

72 ---------- 

73 name : `str` 

74 The EUPS package identifier for the desired package. 

75 """ 

76 # No initialization required at present 

77 pass 

78 

79 @property 

80 def datasetRoot(self): 

81 """The parent directory containing everything related to the 

82 ap_verify dataset (`str`, read-only). 

83 """ 

84 return self._dataRootDir 

85 

86 @property 

87 def rawLocation(self): 

88 """The directory containing the "raw" input data (`str`, read-only). 

89 """ 

90 return os.path.join(self.datasetRoot, 'raw') 

91 

92 @property 

93 def configLocation(self): 

94 """The directory containing configs that can be used to process the data (`str`, read-only). 

95 """ 

96 return os.path.join(self.datasetRoot, 'config') 

97 

98 @property 

99 def pipelineLocation(self): 

100 """The directory containing pipelines that can be used to process the 

101 data in Gen 3 (`str`, read-only). 

102 """ 

103 return os.path.join(self.datasetRoot, 'pipelines') 

104 

105 @property 

106 def instrument(self): 

107 """The Gen 3 instrument associated with this data (`lsst.obs.base.Instrument`, read-only). 

108 """ 

109 butler = dafButler.Butler(self._preloadedRepo, writeable=False) 

110 instruments = list(butler.registry.queryDataIds('instrument')) 

111 if len(instruments) != 1: 

112 raise RuntimeError(f"Dataset does not have exactly one instrument; got {instruments}.") 

113 else: 

114 return obsBase.Instrument.fromName(instruments[0]["instrument"], butler.registry) 

115 

116 @property 

117 def _preloadedRepo(self): 

118 """The directory containing the pre-ingested Gen 3 repo (`str`, read-only). 

119 """ 

120 return os.path.join(self.datasetRoot, 'preloaded') 

121 

122 @property 

123 def _preloadedExport(self): 

124 """The file containing an exported registry of `_preloadedRepo` (`str`, read-only). 

125 """ 

126 return os.path.join(self.configLocation, 'export.yaml') 

127 

128 def _validatePackage(self): 

129 """Confirm that the dataset directory satisfies all assumptions. 

130 

131 Raises 

132 ------ 

133 RuntimeError 

134 Raised if the package represented by this object does not conform to the 

135 dataset framework 

136 

137 Notes 

138 ----- 

139 Requires that `self._dataRootDir` has been initialized. 

140 """ 

141 if not os.path.exists(self.datasetRoot): 

142 raise RuntimeError('Could not find dataset at ' + self.datasetRoot) 

143 if not os.path.exists(self.rawLocation): 

144 raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing data directory') 

145 

146 def __eq__(self, other): 

147 """Test that two Dataset objects are equal. 

148 

149 Two objects are equal iff they refer to the same ap_verify dataset. 

150 """ 

151 return self.datasetRoot == other.datasetRoot 

152 

153 def __repr__(self): 

154 """A string representation that can be used to reconstruct the dataset. 

155 """ 

156 return f"Dataset({self._id!r})" 

157 

158 def makeCompatibleRepoGen3(self, repoDir, sasquatchNamespace=None, sasquatchRestProxyUrl=None, 

159 extra=None): 

160 """Set up a directory as a Gen 3 repository compatible with this ap_verify dataset. 

161 

162 If the repository already exists, this call has no effect. 

163 

164 Parameters 

165 ---------- 

166 repoDir : `str` 

167 The directory where the output repository will be created. 

168 sasquatchNamespace : `str`, optional 

169 The namespace to which to upload analysis_tools metrics. If 

170 omitted, no metrics are uploaded. 

171 sasquatchRestProxyUrl : `str`, optional 

172 The server to which to upload analysis_tools metrics. Must be 

173 provided if ``sasquatchNamespace`` is. 

174 extra : `dict`, optional 

175 Extra parameters needed to post ap_verify metrics. Should be 

176 provided if ``sasquatchNamespace`` is. 

177 """ 

178 # No way to tell makeRepo "create only what's missing" 

179 try: 

180 seedConfig = dafButler.Config() 

181 # Checksums greatly slow importing of large repositories 

182 seedConfig["datastore", "checksum"] = False 

183 transfMode = "auto" 

184 if sasquatchRestProxyUrl is not None: 

185 seedConfig[ 

186 "datastore", "cls"] = "lsst.daf.butler.datastores.chainedDatastore.ChainedDatastore" 

187 

188 datastores = [ 

189 {"cls": "lsst.daf.butler.datastores.fileDatastore.FileDatastore", 

190 "root": "<butlerRoot>", 

191 }, 

192 {"cls": "lsst.analysis.tools.interfaces.datastore.SasquatchDatastore", 

193 "restProxyUrl": sasquatchRestProxyUrl, 

194 "namespace": sasquatchNamespace, 

195 "extra_fields": extra if extra is not None else {}, 

196 }, 

197 ] 

198 seedConfig["datastore", "datastores"] = datastores 

199 transfMode = "direct" 

200 repoConfig = dafButler.Butler.makeRepo(repoDir, config=seedConfig) 

201 butler = dafButler.Butler(repoConfig, writeable=True) 

202 butler.import_(directory=self._preloadedRepo, filename=self._preloadedExport, transfer=transfMode) 

203 except FileExistsError: 

204 pass 

205 

206 

207def _isRepo(repoDir): 

208 """Test whether a directory has been set up as a repository. 

209 """ 

210 return os.path.exists(os.path.join(repoDir, '_mapper')) \ 

211 or os.path.exists(os.path.join(repoDir, 'repositoryCfg.yaml'))