Coverage for tests/test_server.py: 16%
202 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 02:52 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 02:52 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28import os.path
29import unittest
30import uuid
32from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel
34try:
35 # Failing to import any of these should disable the tests.
36 import safir.dependencies.logger
37 from fastapi.testclient import TestClient
38 from lsst.daf.butler.remote_butler import RemoteButler
39 from lsst.daf.butler.remote_butler._authentication import _EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY
40 from lsst.daf.butler.remote_butler.server import create_app
41 from lsst.daf.butler.remote_butler.server._dependencies import butler_factory_dependency
42 from lsst.daf.butler.tests.server import TEST_REPOSITORY_NAME, UnhandledServerError, create_test_server
43except ImportError:
44 create_test_server = None
46from unittest.mock import NonCallableMock, patch
48from lsst.daf.butler import (
49 Butler,
50 DataCoordinate,
51 DatasetNotFoundError,
52 DatasetRef,
53 LabeledButlerFactory,
54 MissingDatasetTypeError,
55 NoDefaultCollectionError,
56 StorageClassFactory,
57)
58from lsst.daf.butler.datastore import DatasetRefURIs
59from lsst.daf.butler.registry import RegistryDefaults
60from lsst.daf.butler.tests import DatastoreMock, addDatasetType
61from lsst.daf.butler.tests.utils import MetricsExample, MetricTestRepo, mock_env
62from lsst.resources import ResourcePath
63from lsst.resources.http import HttpResourcePath
65TESTDIR = os.path.abspath(os.path.dirname(__file__))
68@unittest.skipIf(create_test_server is None, "Server dependencies not installed.")
69class ButlerClientServerTestCase(unittest.TestCase):
70 """Test for Butler client/server."""
72 @classmethod
73 def setUpClass(cls):
74 server_instance = cls.enterClassContext(create_test_server(TESTDIR))
75 cls.client = server_instance.client
76 cls.butler = server_instance.remote_butler
77 cls.butler_without_error_propagation = server_instance.remote_butler_without_error_propagation
79 cls.storageClassFactory = StorageClassFactory()
81 cls.repo = MetricTestRepo.create_from_butler(
82 server_instance.direct_butler, server_instance.config_file_path
83 )
84 # Add a file with corrupted data for testing error conditions
85 cls.dataset_with_corrupted_data = _create_corrupted_dataset(cls.repo)
86 # All of the datasets that come with MetricTestRepo are disassembled
87 # composites. Add a simple dataset for testing the common case.
88 cls.simple_dataset_ref = _create_simple_dataset(server_instance.direct_butler)
90 # Populate the test server.
91 # The DatastoreMock is required because the datasets referenced in
92 # these imports do not point at real files.
93 direct_butler = server_instance.direct_butler
94 DatastoreMock.apply(direct_butler)
95 direct_butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
96 direct_butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml"))
98 def test_health_check(self):
99 try:
100 import importlib.metadata
102 importlib.metadata.metadata("lsst.daf.butler")
103 except ModuleNotFoundError:
104 raise self.skipTest("Standard python package metadata not available. Butler not pip installed.")
105 response = self.client.get("/")
106 self.assertEqual(response.status_code, 200)
107 self.assertEqual(response.json()["name"], "butler")
109 def test_dimension_universe(self):
110 universe = self.butler.dimensions
111 self.assertEqual(universe.namespace, "daf_butler")
113 def test_get_dataset_type(self):
114 bias_type = self.butler.get_dataset_type("bias")
115 self.assertEqual(bias_type.name, "bias")
117 with self.assertRaises(MissingDatasetTypeError):
118 self.butler_without_error_propagation.get_dataset_type("not_bias")
120 def test_find_dataset(self):
121 storage_class = self.storageClassFactory.getStorageClass("Exposure")
123 ref = self.butler.find_dataset("bias", collections="imported_g", detector=1, instrument="Cam1")
124 self.assertIsInstance(ref, DatasetRef)
125 self.assertEqual(ref.id, uuid.UUID("e15ab039-bc8b-4135-87c5-90902a7c0b22"))
126 self.assertFalse(ref.dataId.hasRecords())
128 # Try again with variation of parameters.
129 ref_new = self.butler.find_dataset(
130 "bias",
131 {"detector": 1},
132 collections="imported_g",
133 instrument="Cam1",
134 dimension_records=True,
135 )
136 self.assertEqual(ref_new, ref)
137 self.assertTrue(ref_new.dataId.hasRecords())
139 ref_new = self.butler.find_dataset(
140 ref.datasetType,
141 DataCoordinate.standardize(detector=1, instrument="Cam1", universe=self.butler.dimensions),
142 collections="imported_g",
143 storage_class=storage_class,
144 )
145 self.assertEqual(ref_new, ref)
147 ref2 = self.butler.get_dataset(ref.id)
148 self.assertEqual(ref2, ref)
150 # Use detector name to find it.
151 ref3 = self.butler.find_dataset(
152 ref.datasetType,
153 collections="imported_g",
154 instrument="Cam1",
155 full_name="Aa",
156 )
157 self.assertEqual(ref2, ref3)
159 # Try expanded refs.
160 self.assertFalse(ref.dataId.hasRecords())
161 expanded = self.butler.get_dataset(ref.id, dimension_records=True)
162 self.assertTrue(expanded.dataId.hasRecords())
164 # The test datasets are all Exposure so storage class conversion
165 # can not be tested until we fix that. For now at least test the
166 # code paths.
167 bias = self.butler.get_dataset(ref.id, storage_class=storage_class)
168 self.assertEqual(bias.datasetType.storageClass, storage_class)
170 # Unknown dataset should not fail.
171 self.assertIsNone(self.butler.get_dataset(uuid.uuid4()))
172 self.assertIsNone(self.butler.get_dataset(uuid.uuid4(), storage_class="NumpyArray"))
174 def test_instantiate_via_butler_http_search(self):
175 """Ensure that the primary Butler constructor's automatic search logic
176 correctly locates and reads the configuration file and ends up with a
177 RemoteButler pointing to the correct URL
178 """
180 # This is kind of a fragile test. Butler's search logic does a lot of
181 # manipulations involving creating new ResourcePaths, and ResourcePath
182 # doesn't use httpx so we can't easily inject the TestClient in there.
183 # We don't have an actual valid HTTP URL to give to the constructor
184 # because the test instance of the server is accessed via ASGI.
185 #
186 # Instead we just monkeypatch the HTTPResourcePath 'read' method and
187 # hope that all ResourcePath HTTP reads during construction are going
188 # to the server under test.
189 def override_read(http_resource_path):
190 return self.client.get(http_resource_path.geturl()).content
192 server_url = f"https://test.example/api/butler/repo/{TEST_REPOSITORY_NAME}/"
194 with patch.object(HttpResourcePath, "read", override_read):
195 # RegistryDefaults.finish() needs to download the dimension
196 # universe from the server, which will fail because there is no
197 # server here. So mock it out.
198 with patch.object(RegistryDefaults, "finish"):
199 # Add access key to environment variables. RemoteButler
200 # instantiation will throw an error if access key is not
201 # available.
202 with mock_env({_EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY: "fake-access-token"}):
203 butler = Butler(
204 server_url,
205 collections=["collection1", "collection2"],
206 run="collection2",
207 )
208 self.assertIsInstance(butler, RemoteButler)
209 self.assertEqual(butler._connection.server_url, server_url)
210 self.assertEqual(butler.collections, ("collection1", "collection2"))
211 self.assertEqual(butler.run, "collection2")
213 butler_factory = LabeledButlerFactory({"server": server_url})
214 factory_created_butler = butler_factory.create_butler(label="server", access_token="token")
215 self.assertIsInstance(factory_created_butler, RemoteButler)
216 self.assertEqual(factory_created_butler._connection.server_url, server_url)
218 def test_get(self):
219 dataset_type = "test_metric_comp"
220 data_id = {"instrument": "DummyCamComp", "visit": 423}
221 collections = "ingest/run"
222 # Test get() of a DatasetRef.
223 ref = self.butler.find_dataset(dataset_type, data_id, collections=collections)
224 metric = self.butler.get(ref)
225 self.assertIsInstance(metric, MetricsExample)
226 self.assertEqual(metric.summary, MetricTestRepo.METRICS_EXAMPLE_SUMMARY)
228 # Test get() by DataId.
229 data_id_metric = self.butler.get(dataset_type, dataId=data_id, collections=collections)
230 self.assertEqual(metric, data_id_metric)
231 # Test get() by DataId dict augmented with kwargs.
232 kwarg_metric = self.butler.get(
233 dataset_type, dataId={"instrument": "DummyCamComp"}, collections=collections, visit=423
234 )
235 self.assertEqual(metric, kwarg_metric)
236 # Test get() by DataId DataCoordinate augmented with kwargs.
237 coordinate = DataCoordinate.make_empty(self.butler.dimensions)
238 kwarg_data_coordinate_metric = self.butler.get(
239 dataset_type, dataId=coordinate, collections=collections, instrument="DummyCamComp", visit=423
240 )
241 self.assertEqual(metric, kwarg_data_coordinate_metric)
242 # Test get() of a non-existent DataId.
243 invalid_data_id = {"instrument": "NotAValidlInstrument", "visit": 423}
244 with self.assertRaises(DatasetNotFoundError):
245 self.butler_without_error_propagation.get(
246 dataset_type, dataId=invalid_data_id, collections=collections
247 )
249 # Test get() by DataId with default collections.
250 butler_with_default_collection = self.butler._clone(collections="ingest/run")
251 default_collection_metric = butler_with_default_collection.get(dataset_type, dataId=data_id)
252 self.assertEqual(metric, default_collection_metric)
254 # Test get() by DataId with no collections specified.
255 with self.assertRaises(NoDefaultCollectionError):
256 self.butler_without_error_propagation.get(dataset_type, dataId=data_id)
258 # Test looking up a non-existent ref
259 invalid_ref = ref.replace(id=uuid.uuid4())
260 with self.assertRaises(DatasetNotFoundError):
261 self.butler_without_error_propagation.get(invalid_ref)
263 with self.assertRaises(RuntimeError):
264 self.butler_without_error_propagation.get(self.dataset_with_corrupted_data)
266 # Test storage class override
267 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion")
269 def check_sc_override(converted):
270 self.assertNotEqual(type(metric), type(converted))
271 self.assertIsInstance(converted, new_sc.pytype)
272 self.assertEqual(metric, converted)
274 check_sc_override(self.butler.get(ref, storageClass=new_sc))
276 # Test storage class override via DatasetRef.
277 check_sc_override(self.butler.get(ref.overrideStorageClass("MetricsConversion")))
278 # Test storage class override via DatasetType.
279 check_sc_override(
280 self.butler.get(
281 ref.datasetType.overrideStorageClass(new_sc), dataId=data_id, collections=collections
282 )
283 )
285 # Test component override via DatasetRef.
286 component_ref = ref.makeComponentRef("summary")
287 component_data = self.butler.get(component_ref)
288 self.assertEqual(component_data, MetricTestRepo.METRICS_EXAMPLE_SUMMARY)
290 # Test overriding both storage class and component via DatasetRef.
291 converted_component_data = self.butler.get(component_ref, storageClass="DictConvertibleModel")
292 self.assertIsInstance(converted_component_data, DictConvertibleModel)
293 self.assertEqual(converted_component_data.content, MetricTestRepo.METRICS_EXAMPLE_SUMMARY)
295 # Test component override via DatasetType.
296 dataset_type_component_data = self.butler.get(
297 component_ref.datasetType, component_ref.dataId, collections=collections
298 )
299 self.assertEqual(dataset_type_component_data, MetricTestRepo.METRICS_EXAMPLE_SUMMARY)
301 def test_getURIs_no_components(self):
302 # This dataset does not have components, and should return one URI.
303 def check_uri(uri: ResourcePath):
304 self.assertIsNotNone(uris.primaryURI)
305 self.assertEqual(uris.primaryURI.scheme, "https")
306 self.assertEqual(uris.primaryURI.read(), b"123")
308 uris = self.butler.getURIs(self.simple_dataset_ref)
309 self.assertEqual(len(uris.componentURIs), 0)
310 check_uri(uris.primaryURI)
312 check_uri(self.butler.getURI(self.simple_dataset_ref))
314 def test_getURIs_multiple_components(self):
315 # This dataset has multiple components, so we should get back multiple
316 # URIs.
317 dataset_type = "test_metric_comp"
318 data_id = {"instrument": "DummyCamComp", "visit": 423}
319 collections = "ingest/run"
321 def check_uris(uris: DatasetRefURIs):
322 self.assertIsNone(uris.primaryURI)
323 self.assertEqual(len(uris.componentURIs), 3)
324 path = uris.componentURIs["summary"]
325 self.assertEqual(path.scheme, "https")
326 data = path.read()
327 self.assertEqual(data, b"AM1: 5.2\nAM2: 30.6\n")
329 uris = self.butler.getURIs(dataset_type, dataId=data_id, collections=collections)
330 check_uris(uris)
332 # Calling getURI on a multi-file dataset raises an exception
333 with self.assertRaises(RuntimeError):
334 self.butler.getURI(dataset_type, dataId=data_id, collections=collections)
336 # getURIs does NOT respect component overrides on the DatasetRef,
337 # instead returning the parent's URIs. Unclear if this is "correct"
338 # from a conceptual point of view, but this matches DirectButler
339 # behavior.
340 ref = self.butler.find_dataset(dataset_type, data_id=data_id, collections=collections)
341 componentRef = ref.makeComponentRef("summary")
342 componentUris = self.butler.getURIs(componentRef)
343 check_uris(componentUris)
345 def test_auth_check(self):
346 # This is checking that the unit-test middleware for validating the
347 # authentication headers is working. It doesn't test actual server
348 # functionality -- in a real deployment, the authentication headers are
349 # handled by GafaelfawrIngress, not our app.
350 with self.assertRaises(UnhandledServerError) as cm:
351 self.client.get("/v1/dataset_type/int")
352 self.assertEqual(cm.exception.__cause__.status_code, 401)
354 def test_exception_logging(self):
355 app = create_app()
357 def raise_error():
358 raise RuntimeError("An unhandled error")
360 app.dependency_overrides[butler_factory_dependency] = raise_error
361 client = TestClient(app, raise_server_exceptions=False)
363 with patch.object(safir.dependencies.logger, "logger_dependency") as mock_logger_dep:
364 mock_logger = NonCallableMock(["aerror"])
366 async def noop():
367 pass
369 mock_logger.aerror.return_value = noop()
371 async def get_logger():
372 return mock_logger
374 mock_logger_dep.return_value = get_logger()
375 client.get(
376 "/api/butler/repo/something/v1/dataset_type/int",
377 headers={"X-Auth-Request-User": "user-name", "X-Butler-Client-Request-Id": "request-id"},
378 )
379 mock_logger_dep.assert_called_once()
381 mock_logger.aerror.assert_called_once()
382 args, kwargs = mock_logger.aerror.call_args
383 self.assertIsInstance(kwargs["exc_info"], RuntimeError)
384 self.assertEqual(kwargs["clientRequestId"], "request-id")
385 self.assertEqual(kwargs["user"], "user-name")
388def _create_corrupted_dataset(repo: MetricTestRepo) -> DatasetRef:
389 run = "corrupted-run"
390 ref = repo.addDataset({"instrument": "DummyCamComp", "visit": 423}, run=run)
391 uris = repo.butler.getURIs(ref)
392 oneOfTheComponents = list(uris.componentURIs.values())[0]
393 oneOfTheComponents.write("corrupted data")
394 return ref
397def _create_simple_dataset(butler: Butler) -> DatasetRef:
398 dataset_type = addDatasetType(butler, "test_int", {"instrument", "visit"}, "int")
399 ref = butler.put(123, dataset_type, dataId={"instrument": "DummyCamComp", "visit": 423}, run="ingest/run")
400 return ref
403if __name__ == "__main__":
404 unittest.main()