Coverage for tests / test_registry_dataset_type_overrides.py: 15%

101 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-06 08:30 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28import os 

29import unittest 

30 

31from lsst.daf.butler import Butler, Config, DatasetRef, DatasetType, MissingDatasetTypeError 

32from lsst.daf.butler.registry import ConflictingDefinitionError 

33from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel 

34from lsst.daf.butler.tests.utils import safeTestTempDir 

35 

36TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

37 

38 

39class RegistryDatasetTypeOverridesTestCase(unittest.TestCase): 

40 """Tests for overriding dataset type names and storage classes in registry 

41 configuration. 

42 """ 

43 

44 def setUp(self) -> None: 

45 model_type_name = "lsst.daf.butler.tests.dict_convertible_model.DictConvertibleModel" 

46 config = Config() 

47 # Configure two storage classes that are bidirectional convertible 

48 # but with different Python types, and a third that is not 

49 # convertible to either. 

50 config["storageClasses"] = { 

51 "BuiltinDict": { 

52 "pytype": "dict", 

53 "converters": {model_type_name: f"{model_type_name}.to_dict"}, 

54 }, 

55 "DictModel": { 

56 "pytype": model_type_name, 

57 "converters": {"dict": f"{model_type_name}.from_dict"}, 

58 }, 

59 "BuiltinList": {"pytype": "list"}, 

60 } 

61 config["datastore"] = { 

62 "formatters": { 

63 "BuiltinDict": "lsst.daf.butler.formatters.yaml.YamlFormatter", 

64 "DictModel": "lsst.daf.butler.formatters.json.JsonFormatter", 

65 "BuiltinList": "lsst.daf.butler.formatters.json.JsonFormatter", 

66 } 

67 } 

68 repo_dir = self.enterContext(safeTestTempDir(TESTDIR)) 

69 self.base_config = Butler.makeRepo(root=repo_dir, config=config) 

70 self.base_butler = self.enterContext(Butler.from_config(self.base_config, writeable=True)) 

71 

72 def test_rename(self) -> None: 

73 """Test renaming two dataset types in config overrides, to mimic the 

74 case where and old dataset type is squatting on a name we want to use 

75 for a new one. 

76 """ 

77 self.setup_rename() 

78 self.base_butler.registry.registerDatasetType(self.legacy_dst) 

79 self.base_butler.registry.registerDatasetType(self.future_dst) 

80 with Butler.from_config(self.override_config) as override_butler: 

81 # Butler with overrides sees the rename: 

82 self.assertEqual(override_butler.get_dataset_type("legacy_dst"), self.legacy_dst_override) 

83 self.assertEqual(override_butler.get_dataset_type("dst"), self.future_dst_override) 

84 with self.assertRaises(MissingDatasetTypeError): 

85 override_butler.get_dataset_type("future_dst") 

86 # Original butler still sees the original definitions: 

87 self.assertEqual(self.base_butler.get_dataset_type("dst"), self.legacy_dst) 

88 self.assertEqual(self.base_butler.get_dataset_type("future_dst"), self.future_dst) 

89 with self.assertRaises(MissingDatasetTypeError): 

90 self.base_butler.get_dataset_type("legacy_dst") 

91 # Test dataset type queries with a new butler to hit different 

92 # caching/fetching paths. 

93 with Butler.from_config(self.override_config) as override_butler: 

94 self.assertCountEqual( 

95 override_butler.registry.queryDatasetTypes("*"), 

96 [self.legacy_dst_override, self.future_dst_override], 

97 ) 

98 # Test put/get and dataset queries across the two butlers. 

99 with Butler.from_config(self.override_config, writeable=True) as override_butler: 

100 legacy_ref = self.base_butler.put({"one": 1}, "dst", run="run1") 

101 future_ref_override = override_butler.put([2], "dst", instrument="Cam1", run="run1") 

102 self.base_butler.registry.refresh() 

103 override_butler.registry.refresh() 

104 self.assertEqual(self.base_butler.get("future_dst", instrument="Cam1", collections=["run1"]), [2]) 

105 self.assertEqual(override_butler.get("legacy_dst", collections=["run1"]), {"one": 1}) 

106 self.assertEqual( 

107 self.base_butler.getURI("future_dst", instrument="Cam1", collections=["run1"]).getExtension(), 

108 ".json", 

109 ) 

110 self.assertEqual( 

111 override_butler.getURI("legacy_dst", collections=["run1"]).getExtension(), ".yaml" 

112 ) 

113 self.assertEqual( 

114 self.base_butler.query_datasets("future_dst", collections=["run1"]), 

115 [DatasetRef(self.future_dst, future_ref_override.dataId, "run1", id=future_ref_override.id)], 

116 ) 

117 self.assertEqual( 

118 override_butler.query_datasets("legacy_dst", collections=["run1"]), 

119 [DatasetRef(self.legacy_dst_override, legacy_ref.dataId, "run1", id=legacy_ref.id)], 

120 ) 

121 

122 def test_rename_registration(self) -> None: 

123 """Test dataset type registration when the dataset type name has been 

124 configured to be renamed. 

125 """ 

126 self.setup_rename() 

127 with Butler.from_config(self.override_config, writeable=True) as override_butler: 

128 with self.assertRaises(ConflictingDefinitionError): 

129 # Even though the original name of a renamed dataset type 

130 # appears missing, we shouldn't be able to register it (this 

131 # is the only way in which an override rename should differ 

132 # from a real DB-level rename). 

133 override_butler.registry.registerDatasetType(self.future_dst) 

134 # We can register the original dataset type in the base repo by 

135 # registering its rename in the override repo. 

136 override_butler.registry.registerDatasetType(self.future_dst_override) 

137 self.base_butler.registry.refresh() 

138 self.assertEqual(self.base_butler.get_dataset_type("future_dst"), self.future_dst) 

139 

140 def setup_rename(self) -> None: 

141 """Do additional test setup for rename-override tests.""" 

142 self.legacy_dst = DatasetType( 

143 "dst", dimensions=(), storageClass="BuiltinDict", universe=self.base_butler.dimensions 

144 ) 

145 self.future_dst = DatasetType( 

146 "future_dst", 

147 dimensions={"instrument"}, 

148 storageClass="BuiltinList", 

149 universe=self.base_butler.dimensions, 

150 ) 

151 self.base_butler.registry.insertDimensionData("instrument", {"name": "Cam1"}) 

152 self.base_butler.collections.register("run1") 

153 self.override_config = self.base_config.copy() 

154 self.override_config[".registry.managers.datasets"] = { 

155 "cls": self.base_config[".registry.managers.datasets"], 

156 "config": { 

157 "overrides": { 

158 "dst": {"rename": "legacy_dst"}, 

159 "future_dst": {"rename": "dst"}, 

160 } 

161 }, 

162 } 

163 self.legacy_dst_override = DatasetType( 

164 "legacy_dst", dimensions=(), storageClass="BuiltinDict", universe=self.base_butler.dimensions 

165 ) 

166 self.future_dst_override = DatasetType( 

167 "dst", 

168 dimensions={"instrument"}, 

169 storageClass="BuiltinList", 

170 universe=self.base_butler.dimensions, 

171 ) 

172 

173 def test_storage_class_override_config(self) -> None: 

174 """Test overriding the storage class of a dataset type via a repository 

175 configuration override. 

176 """ 

177 self.setup_storage_class() 

178 self.base_butler.registry.registerDatasetType(self.base_dst) 

179 with Butler.from_config(self.override_config, writeable=True) as override_butler: 

180 self.assertEqual(self.base_butler.get_dataset_type("dst"), self.base_dst) 

181 self.assertEqual(override_butler.get_dataset_type("dst"), self.override_dst) 

182 # 'put' with both butlers using their natural storage classes. 

183 obj1 = {"one": "1"} 

184 obj2 = DictConvertibleModel(content={"two": "2"}, extra="three") 

185 ref1 = self.base_butler.put(obj1, "dst", run="run1") 

186 ref2 = override_butler.put(obj2, "dst", run="run2") 

187 self.base_butler.registry.refresh() 

188 override_butler.registry.refresh() 

189 # We can 'get' using the refs with either butler, since that's 

190 # just a call-level dataset type override that makes the datastore 

191 # 'get' match the storage class used for the write. 

192 self.assertEqual(self.base_butler.get(ref1), obj1) 

193 self.assertEqual(self.base_butler.get(ref2), obj2) 

194 self.assertEqual(override_butler.get(ref1), obj1) 

195 self.assertEqual(override_butler.get(ref2), obj2) 

196 # Check that we used the right formatters by looking at the 

197 # extensions. 

198 self.assertEqual(self.base_butler.getURI(ref1).getExtension(), ".yaml") 

199 self.assertEqual(self.base_butler.getURI(ref2).getExtension(), ".json") 

200 # Do a 'get' with just the dataset type name with the same butler 

201 # we used to do the write; no conversion necessary. 

202 self.assertEqual(self.base_butler.get("dst", collections=["run1"]), obj1) 

203 self.assertEqual(override_butler.get("dst", collections=["run2"]), obj2) 

204 # Do a 'get' with just the dataset type with the other butlers. 

205 # This should do a storage class conversion. 

206 self.assertEqual(self.base_butler.get("dst", collections=["run2"]), obj2.to_dict()) 

207 self.assertEqual( 

208 override_butler.get("dst", collections=["run1"]), DictConvertibleModel.from_dict(obj1) 

209 ) 

210 # Do a 'get' with call-level storage class overrides that differ 

211 # from the client's storage class. 

212 self.assertEqual( 

213 self.base_butler.get("dst", collections=["run1"], storageClass="DictModel"), 

214 DictConvertibleModel.from_dict(obj1), 

215 ) 

216 self.assertEqual( 

217 override_butler.get("dst", collections=["run2"], storageClass="BuiltinDict"), obj2.to_dict() 

218 ) 

219 # Query for datasets and check that the refs have the right 

220 # storage classes. 

221 self.assertCountEqual( 

222 self.base_butler.query_datasets("dst", collections=["run1", "run2"], find_first=False), 

223 [ref1, ref2.overrideStorageClass("BuiltinDict")], 

224 ) 

225 self.assertCountEqual( 

226 override_butler.query_datasets("dst", collections=["run1", "run2"], find_first=False), 

227 [ref1.overrideStorageClass("DictModel"), ref2], 

228 ) 

229 

230 def test_storage_class_override_config_registration(self) -> None: 

231 """Test registering dataset types whose storage classes have been 

232 overridden via repository configuration. 

233 """ 

234 self.setup_storage_class() 

235 with Butler.from_config(self.override_config, writeable=True) as override_butler: 

236 # Trying to register a dataset type with a storage class override 

237 # when it doesn't already exist is an error, because we don't know 

238 # what it's storage class should really be in the database. 

239 with self.assertRaises(ConflictingDefinitionError): 

240 override_butler.registry.registerDatasetType(self.override_dst) 

241 # Registering in the base repository is of course fine. 

242 self.base_butler.registry.registerDatasetType(self.base_dst) 

243 # Re-registering with the new storage class in the override repo 

244 # should be a no-op, because it should look like it already exists 

245 # with that storage class. 

246 self.assertFalse(override_butler.registry.registerDatasetType(self.override_dst)) 

247 # Re-registering with the base repo's storage class in the 

248 # override repo should be an error, because to the override 

249 # repo it looks like it has a different storage class. 

250 with self.assertRaises(ConflictingDefinitionError): 

251 override_butler.registry.registerDatasetType(self.base_dst) 

252 

253 def setup_storage_class(self) -> None: 

254 """Do additional test setup for storage-class-override tests.""" 

255 self.base_dst = DatasetType( 

256 "dst", dimensions=(), storageClass="BuiltinDict", universe=self.base_butler.dimensions 

257 ) 

258 self.override_dst = self.base_dst.overrideStorageClass("DictModel") 

259 self.base_butler.collections.register("run1") 

260 self.base_butler.collections.register("run2") 

261 self.override_config = self.base_config.copy() 

262 self.override_config[".registry.managers.datasets"] = { 

263 "cls": self.base_config[".registry.managers.datasets"], 

264 "config": { 

265 "overrides": { 

266 "dst": {"storageClass": "DictModel"}, 

267 } 

268 }, 

269 } 

270 

271 

272if __name__ == "__main__": 

273 unittest.main()