Coverage for tests/test_server.py: 18%
223 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-01 11:19 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-01 11:19 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28import os.path
29import unittest
30import uuid
32from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel
34try:
35 # Failing to import any of these should disable the tests.
36 import safir.dependencies.logger
37 from fastapi import HTTPException
38 from fastapi.testclient import TestClient
39 from lsst.daf.butler.remote_butler import RemoteButler, RemoteButlerFactory
40 from lsst.daf.butler.remote_butler._authentication import _EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY
41 from lsst.daf.butler.remote_butler.server import create_app
42 from lsst.daf.butler.remote_butler.server._dependencies import butler_factory_dependency
43 from lsst.daf.butler.tests.server_utils import add_auth_header_check_middleware
44 from lsst.resources.s3utils import clean_test_environment_for_s3, getS3Client
46 try:
47 from moto import mock_aws # v5
48 except ImportError:
49 from moto import mock_s3 as mock_aws
50except ImportError:
51 TestClient = None
52 create_app = None
54from unittest.mock import NonCallableMock, patch
56from lsst.daf.butler import (
57 Butler,
58 DataCoordinate,
59 DatasetRef,
60 LabeledButlerFactory,
61 MissingDatasetTypeError,
62 NoDefaultCollectionError,
63 StorageClassFactory,
64)
65from lsst.daf.butler._butler_instance_options import ButlerInstanceOptions
66from lsst.daf.butler.datastore import DatasetRefURIs
67from lsst.daf.butler.tests import DatastoreMock, addDatasetType
68from lsst.daf.butler.tests.utils import (
69 MetricsExample,
70 MetricTestRepo,
71 makeTestTempDir,
72 mock_env,
73 removeTestTempDir,
74)
75from lsst.resources import ResourcePath
76from lsst.resources.http import HttpResourcePath
78TESTDIR = os.path.abspath(os.path.dirname(__file__))
80TEST_REPOSITORY_NAME = "testrepo"
83def _make_test_client(app, raise_server_exceptions=True):
84 client = TestClient(app, raise_server_exceptions=raise_server_exceptions)
85 return client
88def _make_remote_butler(http_client, *, collections: str | None = None):
89 options = None
90 if collections is not None:
91 options = ButlerInstanceOptions(collections=collections)
92 factory = RemoteButlerFactory(f"https://test.example/api/butler/repo/{TEST_REPOSITORY_NAME}", http_client)
93 return factory.create_butler_for_access_token("fake-access-token", butler_options=options)
96@unittest.skipIf(TestClient is None or create_app is None, "FastAPI not installed.")
97class ButlerClientServerTestCase(unittest.TestCase):
98 """Test for Butler client/server."""
100 @classmethod
101 def setUpClass(cls):
102 # Set up a mock S3 environment using Moto. Moto also monkeypatches the
103 # `requests` library so that any HTTP requests to presigned S3 URLs get
104 # redirected to the mocked S3.
105 # Note that all files are stored in memory.
106 cls.enterClassContext(clean_test_environment_for_s3())
107 cls.enterClassContext(mock_aws())
108 bucket_name = "anybucketname" # matches s3Datastore.yaml
109 getS3Client().create_bucket(Bucket=bucket_name)
111 cls.storageClassFactory = StorageClassFactory()
113 # First create a butler and populate it.
114 cls.root = makeTestTempDir(TESTDIR)
115 cls.repo = MetricTestRepo(
116 root=cls.root,
117 configFile=os.path.join(TESTDIR, "config/basic/butler-s3store.yaml"),
118 forceConfigRoot=False,
119 )
120 # Add a file with corrupted data for testing error conditions
121 cls.dataset_with_corrupted_data = _create_corrupted_dataset(cls.repo)
122 # All of the datasets that come with MetricTestRepo are disassembled
123 # composites. Add a simple dataset for testing the common case.
124 cls.simple_dataset_ref = _create_simple_dataset(cls.repo.butler)
126 # Override the server's Butler initialization to point at our test repo
127 server_butler_factory = LabeledButlerFactory({TEST_REPOSITORY_NAME: cls.root})
129 app = create_app()
130 app.dependency_overrides[butler_factory_dependency] = lambda: server_butler_factory
131 add_auth_header_check_middleware(app)
133 # Set up the RemoteButler that will connect to the server
134 cls.client = _make_test_client(app)
135 cls.butler = _make_remote_butler(cls.client)
136 cls.butler_with_default_collection = _make_remote_butler(cls.client, collections="ingest/run")
137 # By default, the TestClient instance raises any unhandled exceptions
138 # from the server as if they had originated in the client to ease
139 # debugging. However, this can make it appear that error propagation
140 # is working correctly when in a real deployment the server exception
141 # would cause a 500 Internal Server Error. This instance of the butler
142 # is set up so that any unhandled server exceptions do return a 500
143 # status code.
144 cls.butler_without_error_propagation = _make_remote_butler(
145 _make_test_client(app, raise_server_exceptions=False)
146 )
148 # Populate the test server.
149 # The DatastoreMock is required because the datasets referenced in
150 # these imports do not point at real files.
151 DatastoreMock.apply(cls.repo.butler)
152 cls.repo.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
153 cls.repo.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml"))
155 @classmethod
156 def tearDownClass(cls):
157 removeTestTempDir(cls.root)
159 def test_health_check(self):
160 response = self.client.get("/")
161 self.assertEqual(response.status_code, 200)
162 self.assertEqual(response.json()["name"], "butler")
164 def test_dimension_universe(self):
165 universe = self.butler.dimensions
166 self.assertEqual(universe.namespace, "daf_butler")
168 def test_get_dataset_type(self):
169 bias_type = self.butler.get_dataset_type("bias")
170 self.assertEqual(bias_type.name, "bias")
172 with self.assertRaises(MissingDatasetTypeError):
173 self.butler_without_error_propagation.get_dataset_type("not_bias")
175 def test_find_dataset(self):
176 storage_class = self.storageClassFactory.getStorageClass("Exposure")
178 ref = self.butler.find_dataset("bias", collections="imported_g", detector=1, instrument="Cam1")
179 self.assertIsInstance(ref, DatasetRef)
180 self.assertEqual(ref.id, uuid.UUID("e15ab039-bc8b-4135-87c5-90902a7c0b22"))
181 self.assertFalse(ref.dataId.hasRecords())
183 # Try again with variation of parameters.
184 ref_new = self.butler.find_dataset(
185 "bias",
186 {"detector": 1},
187 collections="imported_g",
188 instrument="Cam1",
189 dimension_records=True,
190 )
191 self.assertEqual(ref_new, ref)
192 self.assertTrue(ref_new.dataId.hasRecords())
194 ref_new = self.butler.find_dataset(
195 ref.datasetType,
196 DataCoordinate.standardize(detector=1, instrument="Cam1", universe=self.butler.dimensions),
197 collections="imported_g",
198 storage_class=storage_class,
199 )
200 self.assertEqual(ref_new, ref)
202 ref2 = self.butler.get_dataset(ref.id)
203 self.assertEqual(ref2, ref)
205 # Use detector name to find it.
206 ref3 = self.butler.find_dataset(
207 ref.datasetType,
208 collections="imported_g",
209 instrument="Cam1",
210 full_name="Aa",
211 )
212 self.assertEqual(ref2, ref3)
214 # Try expanded refs.
215 self.assertFalse(ref.dataId.hasRecords())
216 expanded = self.butler.get_dataset(ref.id, dimension_records=True)
217 self.assertTrue(expanded.dataId.hasRecords())
219 # The test datasets are all Exposure so storage class conversion
220 # can not be tested until we fix that. For now at least test the
221 # code paths.
222 bias = self.butler.get_dataset(ref.id, storage_class=storage_class)
223 self.assertEqual(bias.datasetType.storageClass, storage_class)
225 # Unknown dataset should not fail.
226 self.assertIsNone(self.butler.get_dataset(uuid.uuid4()))
227 self.assertIsNone(self.butler.get_dataset(uuid.uuid4(), storage_class="NumpyArray"))
229 def test_instantiate_via_butler_http_search(self):
230 """Ensure that the primary Butler constructor's automatic search logic
231 correctly locates and reads the configuration file and ends up with a
232 RemoteButler pointing to the correct URL
233 """
235 # This is kind of a fragile test. Butler's search logic does a lot of
236 # manipulations involving creating new ResourcePaths, and ResourcePath
237 # doesn't use httpx so we can't easily inject the TestClient in there.
238 # We don't have an actual valid HTTP URL to give to the constructor
239 # because the test instance of the server is accessed via ASGI.
240 #
241 # Instead we just monkeypatch the HTTPResourcePath 'read' method and
242 # hope that all ResourcePath HTTP reads during construction are going
243 # to the server under test.
244 def override_read(http_resource_path):
245 return self.client.get(http_resource_path.geturl()).content
247 server_url = f"https://test.example/api/butler/repo/{TEST_REPOSITORY_NAME}/"
249 with patch.object(HttpResourcePath, "read", override_read):
250 # Add access key to environment variables. RemoteButler
251 # instantiation will throw an error if access key is not
252 # available.
253 with mock_env({_EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY: "fake-access-token"}):
254 butler = Butler(
255 server_url,
256 collections=["collection1", "collection2"],
257 run="collection2",
258 )
259 butler_factory = LabeledButlerFactory({"server": server_url})
260 factory_created_butler = butler_factory.create_butler(label="server", access_token="token")
261 self.assertIsInstance(butler, RemoteButler)
262 self.assertIsInstance(factory_created_butler, RemoteButler)
263 self.assertEqual(butler._server_url, server_url)
264 self.assertEqual(factory_created_butler._server_url, server_url)
266 self.assertEqual(butler.collections, ("collection1", "collection2"))
267 self.assertEqual(butler.run, "collection2")
269 def test_get(self):
270 dataset_type = "test_metric_comp"
271 data_id = {"instrument": "DummyCamComp", "visit": 423}
272 collections = "ingest/run"
273 # Test get() of a DatasetRef.
274 ref = self.butler.find_dataset(dataset_type, data_id, collections=collections)
275 metric = self.butler.get(ref)
276 self.assertIsInstance(metric, MetricsExample)
277 self.assertEqual(metric.summary, MetricTestRepo.METRICS_EXAMPLE_SUMMARY)
279 # Test get() by DataId.
280 data_id_metric = self.butler.get(dataset_type, dataId=data_id, collections=collections)
281 self.assertEqual(metric, data_id_metric)
282 # Test get() by DataId dict augmented with kwargs.
283 kwarg_metric = self.butler.get(
284 dataset_type, dataId={"instrument": "DummyCamComp"}, collections=collections, visit=423
285 )
286 self.assertEqual(metric, kwarg_metric)
287 # Test get() by DataId DataCoordinate augmented with kwargs.
288 coordinate = DataCoordinate.make_empty(self.butler.dimensions)
289 kwarg_data_coordinate_metric = self.butler.get(
290 dataset_type, dataId=coordinate, collections=collections, instrument="DummyCamComp", visit=423
291 )
292 self.assertEqual(metric, kwarg_data_coordinate_metric)
293 # Test get() of a non-existent DataId.
294 invalid_data_id = {"instrument": "NotAValidlInstrument", "visit": 423}
295 with self.assertRaises(LookupError):
296 self.butler_without_error_propagation.get(
297 dataset_type, dataId=invalid_data_id, collections=collections
298 )
300 # Test get() by DataId with default collections.
301 default_collection_metric = self.butler_with_default_collection.get(dataset_type, dataId=data_id)
302 self.assertEqual(metric, default_collection_metric)
304 # Test get() by DataId with no collections specified.
305 with self.assertRaises(NoDefaultCollectionError):
306 self.butler_without_error_propagation.get(dataset_type, dataId=data_id)
308 # Test looking up a non-existent ref
309 invalid_ref = ref.replace(id=uuid.uuid4())
310 with self.assertRaises(LookupError):
311 self.butler_without_error_propagation.get(invalid_ref)
313 with self.assertRaises(RuntimeError):
314 self.butler_without_error_propagation.get(self.dataset_with_corrupted_data)
316 # Test storage class override
317 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion")
319 def check_sc_override(converted):
320 self.assertNotEqual(type(metric), type(converted))
321 self.assertIsInstance(converted, new_sc.pytype)
322 self.assertEqual(metric, converted)
324 check_sc_override(self.butler.get(ref, storageClass=new_sc))
326 # Test storage class override via DatasetRef.
327 check_sc_override(self.butler.get(ref.overrideStorageClass("MetricsConversion")))
328 # Test storage class override via DatasetType.
329 check_sc_override(
330 self.butler.get(
331 ref.datasetType.overrideStorageClass(new_sc), dataId=data_id, collections=collections
332 )
333 )
335 # Test component override via DatasetRef.
336 component_ref = ref.makeComponentRef("summary")
337 component_data = self.butler.get(component_ref)
338 self.assertEqual(component_data, MetricTestRepo.METRICS_EXAMPLE_SUMMARY)
340 # Test overriding both storage class and component via DatasetRef.
341 converted_component_data = self.butler.get(component_ref, storageClass="DictConvertibleModel")
342 self.assertIsInstance(converted_component_data, DictConvertibleModel)
343 self.assertEqual(converted_component_data.content, MetricTestRepo.METRICS_EXAMPLE_SUMMARY)
345 # Test component override via DatasetType.
346 dataset_type_component_data = self.butler.get(
347 component_ref.datasetType, component_ref.dataId, collections=collections
348 )
349 self.assertEqual(dataset_type_component_data, MetricTestRepo.METRICS_EXAMPLE_SUMMARY)
351 def test_getURIs_no_components(self):
352 # This dataset does not have components, and should return one URI.
353 def check_uri(uri: ResourcePath):
354 self.assertIsNotNone(uris.primaryURI)
355 self.assertEqual(uris.primaryURI.scheme, "https")
356 self.assertEqual(uris.primaryURI.read(), b"123")
358 uris = self.butler.getURIs(self.simple_dataset_ref)
359 self.assertEqual(len(uris.componentURIs), 0)
360 check_uri(uris.primaryURI)
362 check_uri(self.butler.getURI(self.simple_dataset_ref))
364 def test_getURIs_multiple_components(self):
365 # This dataset has multiple components, so we should get back multiple
366 # URIs.
367 dataset_type = "test_metric_comp"
368 data_id = {"instrument": "DummyCamComp", "visit": 423}
369 collections = "ingest/run"
371 def check_uris(uris: DatasetRefURIs):
372 self.assertIsNone(uris.primaryURI)
373 self.assertEqual(len(uris.componentURIs), 3)
374 path = uris.componentURIs["summary"]
375 self.assertEqual(path.scheme, "https")
376 data = path.read()
377 self.assertEqual(data, b"AM1: 5.2\nAM2: 30.6\n")
379 uris = self.butler.getURIs(dataset_type, dataId=data_id, collections=collections)
380 check_uris(uris)
382 # Calling getURI on a multi-file dataset raises an exception
383 with self.assertRaises(RuntimeError):
384 self.butler.getURI(dataset_type, dataId=data_id, collections=collections)
386 # getURIs does NOT respect component overrides on the DatasetRef,
387 # instead returning the parent's URIs. Unclear if this is "correct"
388 # from a conceptual point of view, but this matches DirectButler
389 # behavior.
390 ref = self.butler.find_dataset(dataset_type, data_id=data_id, collections=collections)
391 componentRef = ref.makeComponentRef("summary")
392 componentUris = self.butler.getURIs(componentRef)
393 check_uris(componentUris)
395 def test_auth_check(self):
396 # This is checking that the unit-test middleware for validating the
397 # authentication headers is working. It doesn't test actual server
398 # functionality -- in a real deployment, the authentication headers are
399 # handled by GafaelfawrIngress, not our app.
400 with self.assertRaises(HTTPException) as cm:
401 self.client.get("/v1/dataset_type/int")
402 self.assertEqual(cm.exception.status_code, 401)
404 def test_exception_logging(self):
405 app = create_app()
407 def raise_error():
408 raise RuntimeError("An unhandled error")
410 app.dependency_overrides[butler_factory_dependency] = raise_error
411 client = _make_test_client(app, raise_server_exceptions=False)
413 with patch.object(safir.dependencies.logger, "logger_dependency") as mock_logger_dep:
414 mock_logger = NonCallableMock(["aerror"])
416 async def noop():
417 pass
419 mock_logger.aerror.return_value = noop()
421 async def get_logger():
422 return mock_logger
424 mock_logger_dep.return_value = get_logger()
425 client.get(
426 "/api/butler/repo/something/v1/dataset_type/int",
427 headers={"X-Auth-Request-User": "user-name", "X-Butler-Client-Request-Id": "request-id"},
428 )
429 mock_logger_dep.assert_called_once()
431 mock_logger.aerror.assert_called_once()
432 args, kwargs = mock_logger.aerror.call_args
433 self.assertIsInstance(kwargs["exc_info"], RuntimeError)
434 self.assertEqual(kwargs["clientRequestId"], "request-id")
435 self.assertEqual(kwargs["user"], "user-name")
438def _create_corrupted_dataset(repo: MetricTestRepo) -> DatasetRef:
439 run = "corrupted-run"
440 ref = repo.addDataset({"instrument": "DummyCamComp", "visit": 423}, run=run)
441 uris = repo.butler.getURIs(ref)
442 oneOfTheComponents = list(uris.componentURIs.values())[0]
443 oneOfTheComponents.write("corrupted data")
444 return ref
447def _create_simple_dataset(butler: Butler) -> DatasetRef:
448 dataset_type = addDatasetType(butler, "test_int", {"instrument", "visit"}, "int")
449 ref = butler.put(123, dataset_type, dataId={"instrument": "DummyCamComp", "visit": 423})
450 return ref
453if __name__ == "__main__":
454 unittest.main()