Coverage for tests / test_registry_dataset_type_overrides.py: 15%
101 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-06 08:30 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-06 08:30 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28import os
29import unittest
31from lsst.daf.butler import Butler, Config, DatasetRef, DatasetType, MissingDatasetTypeError
32from lsst.daf.butler.registry import ConflictingDefinitionError
33from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel
34from lsst.daf.butler.tests.utils import safeTestTempDir
36TESTDIR = os.path.abspath(os.path.dirname(__file__))
39class RegistryDatasetTypeOverridesTestCase(unittest.TestCase):
40 """Tests for overriding dataset type names and storage classes in registry
41 configuration.
42 """
44 def setUp(self) -> None:
45 model_type_name = "lsst.daf.butler.tests.dict_convertible_model.DictConvertibleModel"
46 config = Config()
47 # Configure two storage classes that are bidirectional convertible
48 # but with different Python types, and a third that is not
49 # convertible to either.
50 config["storageClasses"] = {
51 "BuiltinDict": {
52 "pytype": "dict",
53 "converters": {model_type_name: f"{model_type_name}.to_dict"},
54 },
55 "DictModel": {
56 "pytype": model_type_name,
57 "converters": {"dict": f"{model_type_name}.from_dict"},
58 },
59 "BuiltinList": {"pytype": "list"},
60 }
61 config["datastore"] = {
62 "formatters": {
63 "BuiltinDict": "lsst.daf.butler.formatters.yaml.YamlFormatter",
64 "DictModel": "lsst.daf.butler.formatters.json.JsonFormatter",
65 "BuiltinList": "lsst.daf.butler.formatters.json.JsonFormatter",
66 }
67 }
68 repo_dir = self.enterContext(safeTestTempDir(TESTDIR))
69 self.base_config = Butler.makeRepo(root=repo_dir, config=config)
70 self.base_butler = self.enterContext(Butler.from_config(self.base_config, writeable=True))
72 def test_rename(self) -> None:
73 """Test renaming two dataset types in config overrides, to mimic the
74 case where and old dataset type is squatting on a name we want to use
75 for a new one.
76 """
77 self.setup_rename()
78 self.base_butler.registry.registerDatasetType(self.legacy_dst)
79 self.base_butler.registry.registerDatasetType(self.future_dst)
80 with Butler.from_config(self.override_config) as override_butler:
81 # Butler with overrides sees the rename:
82 self.assertEqual(override_butler.get_dataset_type("legacy_dst"), self.legacy_dst_override)
83 self.assertEqual(override_butler.get_dataset_type("dst"), self.future_dst_override)
84 with self.assertRaises(MissingDatasetTypeError):
85 override_butler.get_dataset_type("future_dst")
86 # Original butler still sees the original definitions:
87 self.assertEqual(self.base_butler.get_dataset_type("dst"), self.legacy_dst)
88 self.assertEqual(self.base_butler.get_dataset_type("future_dst"), self.future_dst)
89 with self.assertRaises(MissingDatasetTypeError):
90 self.base_butler.get_dataset_type("legacy_dst")
91 # Test dataset type queries with a new butler to hit different
92 # caching/fetching paths.
93 with Butler.from_config(self.override_config) as override_butler:
94 self.assertCountEqual(
95 override_butler.registry.queryDatasetTypes("*"),
96 [self.legacy_dst_override, self.future_dst_override],
97 )
98 # Test put/get and dataset queries across the two butlers.
99 with Butler.from_config(self.override_config, writeable=True) as override_butler:
100 legacy_ref = self.base_butler.put({"one": 1}, "dst", run="run1")
101 future_ref_override = override_butler.put([2], "dst", instrument="Cam1", run="run1")
102 self.base_butler.registry.refresh()
103 override_butler.registry.refresh()
104 self.assertEqual(self.base_butler.get("future_dst", instrument="Cam1", collections=["run1"]), [2])
105 self.assertEqual(override_butler.get("legacy_dst", collections=["run1"]), {"one": 1})
106 self.assertEqual(
107 self.base_butler.getURI("future_dst", instrument="Cam1", collections=["run1"]).getExtension(),
108 ".json",
109 )
110 self.assertEqual(
111 override_butler.getURI("legacy_dst", collections=["run1"]).getExtension(), ".yaml"
112 )
113 self.assertEqual(
114 self.base_butler.query_datasets("future_dst", collections=["run1"]),
115 [DatasetRef(self.future_dst, future_ref_override.dataId, "run1", id=future_ref_override.id)],
116 )
117 self.assertEqual(
118 override_butler.query_datasets("legacy_dst", collections=["run1"]),
119 [DatasetRef(self.legacy_dst_override, legacy_ref.dataId, "run1", id=legacy_ref.id)],
120 )
122 def test_rename_registration(self) -> None:
123 """Test dataset type registration when the dataset type name has been
124 configured to be renamed.
125 """
126 self.setup_rename()
127 with Butler.from_config(self.override_config, writeable=True) as override_butler:
128 with self.assertRaises(ConflictingDefinitionError):
129 # Even though the original name of a renamed dataset type
130 # appears missing, we shouldn't be able to register it (this
131 # is the only way in which an override rename should differ
132 # from a real DB-level rename).
133 override_butler.registry.registerDatasetType(self.future_dst)
134 # We can register the original dataset type in the base repo by
135 # registering its rename in the override repo.
136 override_butler.registry.registerDatasetType(self.future_dst_override)
137 self.base_butler.registry.refresh()
138 self.assertEqual(self.base_butler.get_dataset_type("future_dst"), self.future_dst)
140 def setup_rename(self) -> None:
141 """Do additional test setup for rename-override tests."""
142 self.legacy_dst = DatasetType(
143 "dst", dimensions=(), storageClass="BuiltinDict", universe=self.base_butler.dimensions
144 )
145 self.future_dst = DatasetType(
146 "future_dst",
147 dimensions={"instrument"},
148 storageClass="BuiltinList",
149 universe=self.base_butler.dimensions,
150 )
151 self.base_butler.registry.insertDimensionData("instrument", {"name": "Cam1"})
152 self.base_butler.collections.register("run1")
153 self.override_config = self.base_config.copy()
154 self.override_config[".registry.managers.datasets"] = {
155 "cls": self.base_config[".registry.managers.datasets"],
156 "config": {
157 "overrides": {
158 "dst": {"rename": "legacy_dst"},
159 "future_dst": {"rename": "dst"},
160 }
161 },
162 }
163 self.legacy_dst_override = DatasetType(
164 "legacy_dst", dimensions=(), storageClass="BuiltinDict", universe=self.base_butler.dimensions
165 )
166 self.future_dst_override = DatasetType(
167 "dst",
168 dimensions={"instrument"},
169 storageClass="BuiltinList",
170 universe=self.base_butler.dimensions,
171 )
173 def test_storage_class_override_config(self) -> None:
174 """Test overriding the storage class of a dataset type via a repository
175 configuration override.
176 """
177 self.setup_storage_class()
178 self.base_butler.registry.registerDatasetType(self.base_dst)
179 with Butler.from_config(self.override_config, writeable=True) as override_butler:
180 self.assertEqual(self.base_butler.get_dataset_type("dst"), self.base_dst)
181 self.assertEqual(override_butler.get_dataset_type("dst"), self.override_dst)
182 # 'put' with both butlers using their natural storage classes.
183 obj1 = {"one": "1"}
184 obj2 = DictConvertibleModel(content={"two": "2"}, extra="three")
185 ref1 = self.base_butler.put(obj1, "dst", run="run1")
186 ref2 = override_butler.put(obj2, "dst", run="run2")
187 self.base_butler.registry.refresh()
188 override_butler.registry.refresh()
189 # We can 'get' using the refs with either butler, since that's
190 # just a call-level dataset type override that makes the datastore
191 # 'get' match the storage class used for the write.
192 self.assertEqual(self.base_butler.get(ref1), obj1)
193 self.assertEqual(self.base_butler.get(ref2), obj2)
194 self.assertEqual(override_butler.get(ref1), obj1)
195 self.assertEqual(override_butler.get(ref2), obj2)
196 # Check that we used the right formatters by looking at the
197 # extensions.
198 self.assertEqual(self.base_butler.getURI(ref1).getExtension(), ".yaml")
199 self.assertEqual(self.base_butler.getURI(ref2).getExtension(), ".json")
200 # Do a 'get' with just the dataset type name with the same butler
201 # we used to do the write; no conversion necessary.
202 self.assertEqual(self.base_butler.get("dst", collections=["run1"]), obj1)
203 self.assertEqual(override_butler.get("dst", collections=["run2"]), obj2)
204 # Do a 'get' with just the dataset type with the other butlers.
205 # This should do a storage class conversion.
206 self.assertEqual(self.base_butler.get("dst", collections=["run2"]), obj2.to_dict())
207 self.assertEqual(
208 override_butler.get("dst", collections=["run1"]), DictConvertibleModel.from_dict(obj1)
209 )
210 # Do a 'get' with call-level storage class overrides that differ
211 # from the client's storage class.
212 self.assertEqual(
213 self.base_butler.get("dst", collections=["run1"], storageClass="DictModel"),
214 DictConvertibleModel.from_dict(obj1),
215 )
216 self.assertEqual(
217 override_butler.get("dst", collections=["run2"], storageClass="BuiltinDict"), obj2.to_dict()
218 )
219 # Query for datasets and check that the refs have the right
220 # storage classes.
221 self.assertCountEqual(
222 self.base_butler.query_datasets("dst", collections=["run1", "run2"], find_first=False),
223 [ref1, ref2.overrideStorageClass("BuiltinDict")],
224 )
225 self.assertCountEqual(
226 override_butler.query_datasets("dst", collections=["run1", "run2"], find_first=False),
227 [ref1.overrideStorageClass("DictModel"), ref2],
228 )
230 def test_storage_class_override_config_registration(self) -> None:
231 """Test registering dataset types whose storage classes have been
232 overridden via repository configuration.
233 """
234 self.setup_storage_class()
235 with Butler.from_config(self.override_config, writeable=True) as override_butler:
236 # Trying to register a dataset type with a storage class override
237 # when it doesn't already exist is an error, because we don't know
238 # what it's storage class should really be in the database.
239 with self.assertRaises(ConflictingDefinitionError):
240 override_butler.registry.registerDatasetType(self.override_dst)
241 # Registering in the base repository is of course fine.
242 self.base_butler.registry.registerDatasetType(self.base_dst)
243 # Re-registering with the new storage class in the override repo
244 # should be a no-op, because it should look like it already exists
245 # with that storage class.
246 self.assertFalse(override_butler.registry.registerDatasetType(self.override_dst))
247 # Re-registering with the base repo's storage class in the
248 # override repo should be an error, because to the override
249 # repo it looks like it has a different storage class.
250 with self.assertRaises(ConflictingDefinitionError):
251 override_butler.registry.registerDatasetType(self.base_dst)
253 def setup_storage_class(self) -> None:
254 """Do additional test setup for storage-class-override tests."""
255 self.base_dst = DatasetType(
256 "dst", dimensions=(), storageClass="BuiltinDict", universe=self.base_butler.dimensions
257 )
258 self.override_dst = self.base_dst.overrideStorageClass("DictModel")
259 self.base_butler.collections.register("run1")
260 self.base_butler.collections.register("run2")
261 self.override_config = self.base_config.copy()
262 self.override_config[".registry.managers.datasets"] = {
263 "cls": self.base_config[".registry.managers.datasets"],
264 "config": {
265 "overrides": {
266 "dst": {"storageClass": "DictModel"},
267 }
268 },
269 }
272if __name__ == "__main__":
273 unittest.main()