Coverage for tests/test_server.py: 18%

223 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-13 10:56 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28import os.path 

29import unittest 

30import uuid 

31 

32from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel 

33 

34try: 

35 # Failing to import any of these should disable the tests. 

36 import safir.dependencies.logger 

37 from fastapi import HTTPException 

38 from fastapi.testclient import TestClient 

39 from lsst.daf.butler.remote_butler import RemoteButler, RemoteButlerFactory 

40 from lsst.daf.butler.remote_butler._authentication import _EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY 

41 from lsst.daf.butler.remote_butler.server import create_app 

42 from lsst.daf.butler.remote_butler.server._dependencies import butler_factory_dependency 

43 from lsst.daf.butler.tests.server_utils import add_auth_header_check_middleware 

44 from lsst.resources.s3utils import clean_test_environment_for_s3, getS3Client 

45 

46 try: 

47 from moto import mock_aws # v5 

48 except ImportError: 

49 from moto import mock_s3 as mock_aws 

50except ImportError: 

51 TestClient = None 

52 create_app = None 

53 

54from unittest.mock import NonCallableMock, patch 

55 

56from lsst.daf.butler import ( 

57 Butler, 

58 DataCoordinate, 

59 DatasetRef, 

60 LabeledButlerFactory, 

61 MissingDatasetTypeError, 

62 NoDefaultCollectionError, 

63 StorageClassFactory, 

64) 

65from lsst.daf.butler._butler_instance_options import ButlerInstanceOptions 

66from lsst.daf.butler.datastore import DatasetRefURIs 

67from lsst.daf.butler.tests import DatastoreMock, addDatasetType 

68from lsst.daf.butler.tests.utils import ( 

69 MetricsExample, 

70 MetricTestRepo, 

71 makeTestTempDir, 

72 mock_env, 

73 removeTestTempDir, 

74) 

75from lsst.resources import ResourcePath 

76from lsst.resources.http import HttpResourcePath 

77 

78TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

79 

80TEST_REPOSITORY_NAME = "testrepo" 

81 

82 

83def _make_test_client(app, raise_server_exceptions=True): 

84 client = TestClient(app, raise_server_exceptions=raise_server_exceptions) 

85 return client 

86 

87 

88def _make_remote_butler(http_client, *, collections: str | None = None): 

89 options = None 

90 if collections is not None: 

91 options = ButlerInstanceOptions(collections=collections) 

92 factory = RemoteButlerFactory(f"https://test.example/api/butler/repo/{TEST_REPOSITORY_NAME}", http_client) 

93 return factory.create_butler_for_access_token("fake-access-token", butler_options=options) 

94 

95 

96@unittest.skipIf(TestClient is None or create_app is None, "FastAPI not installed.") 

97class ButlerClientServerTestCase(unittest.TestCase): 

98 """Test for Butler client/server.""" 

99 

100 @classmethod 

101 def setUpClass(cls): 

102 # Set up a mock S3 environment using Moto. Moto also monkeypatches the 

103 # `requests` library so that any HTTP requests to presigned S3 URLs get 

104 # redirected to the mocked S3. 

105 # Note that all files are stored in memory. 

106 cls.enterClassContext(clean_test_environment_for_s3()) 

107 cls.enterClassContext(mock_aws()) 

108 

109 # matches server.yaml 

110 for bucket in ["mutable-bucket", "immutable-bucket"]: 

111 getS3Client().create_bucket(Bucket=bucket) 

112 

113 cls.storageClassFactory = StorageClassFactory() 

114 

115 # First create a butler and populate it. 

116 cls.root = makeTestTempDir(TESTDIR) 

117 cls.repo = MetricTestRepo( 

118 root=cls.root, 

119 configFile=os.path.join(TESTDIR, "config/basic/server.yaml"), 

120 forceConfigRoot=False, 

121 ) 

122 # Add a file with corrupted data for testing error conditions 

123 cls.dataset_with_corrupted_data = _create_corrupted_dataset(cls.repo) 

124 # All of the datasets that come with MetricTestRepo are disassembled 

125 # composites. Add a simple dataset for testing the common case. 

126 cls.simple_dataset_ref = _create_simple_dataset(cls.repo.butler) 

127 

128 # Override the server's Butler initialization to point at our test repo 

129 server_butler_factory = LabeledButlerFactory({TEST_REPOSITORY_NAME: cls.root}) 

130 

131 app = create_app() 

132 app.dependency_overrides[butler_factory_dependency] = lambda: server_butler_factory 

133 add_auth_header_check_middleware(app) 

134 

135 # Set up the RemoteButler that will connect to the server 

136 cls.client = _make_test_client(app) 

137 cls.butler = _make_remote_butler(cls.client) 

138 cls.butler_with_default_collection = _make_remote_butler(cls.client, collections="ingest/run") 

139 # By default, the TestClient instance raises any unhandled exceptions 

140 # from the server as if they had originated in the client to ease 

141 # debugging. However, this can make it appear that error propagation 

142 # is working correctly when in a real deployment the server exception 

143 # would cause a 500 Internal Server Error. This instance of the butler 

144 # is set up so that any unhandled server exceptions do return a 500 

145 # status code. 

146 cls.butler_without_error_propagation = _make_remote_butler( 

147 _make_test_client(app, raise_server_exceptions=False) 

148 ) 

149 

150 # Populate the test server. 

151 # The DatastoreMock is required because the datasets referenced in 

152 # these imports do not point at real files. 

153 DatastoreMock.apply(cls.repo.butler) 

154 cls.repo.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

155 cls.repo.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml")) 

156 

157 @classmethod 

158 def tearDownClass(cls): 

159 removeTestTempDir(cls.root) 

160 

161 def test_health_check(self): 

162 response = self.client.get("/") 

163 self.assertEqual(response.status_code, 200) 

164 self.assertEqual(response.json()["name"], "butler") 

165 

166 def test_dimension_universe(self): 

167 universe = self.butler.dimensions 

168 self.assertEqual(universe.namespace, "daf_butler") 

169 

170 def test_get_dataset_type(self): 

171 bias_type = self.butler.get_dataset_type("bias") 

172 self.assertEqual(bias_type.name, "bias") 

173 

174 with self.assertRaises(MissingDatasetTypeError): 

175 self.butler_without_error_propagation.get_dataset_type("not_bias") 

176 

177 def test_find_dataset(self): 

178 storage_class = self.storageClassFactory.getStorageClass("Exposure") 

179 

180 ref = self.butler.find_dataset("bias", collections="imported_g", detector=1, instrument="Cam1") 

181 self.assertIsInstance(ref, DatasetRef) 

182 self.assertEqual(ref.id, uuid.UUID("e15ab039-bc8b-4135-87c5-90902a7c0b22")) 

183 self.assertFalse(ref.dataId.hasRecords()) 

184 

185 # Try again with variation of parameters. 

186 ref_new = self.butler.find_dataset( 

187 "bias", 

188 {"detector": 1}, 

189 collections="imported_g", 

190 instrument="Cam1", 

191 dimension_records=True, 

192 ) 

193 self.assertEqual(ref_new, ref) 

194 self.assertTrue(ref_new.dataId.hasRecords()) 

195 

196 ref_new = self.butler.find_dataset( 

197 ref.datasetType, 

198 DataCoordinate.standardize(detector=1, instrument="Cam1", universe=self.butler.dimensions), 

199 collections="imported_g", 

200 storage_class=storage_class, 

201 ) 

202 self.assertEqual(ref_new, ref) 

203 

204 ref2 = self.butler.get_dataset(ref.id) 

205 self.assertEqual(ref2, ref) 

206 

207 # Use detector name to find it. 

208 ref3 = self.butler.find_dataset( 

209 ref.datasetType, 

210 collections="imported_g", 

211 instrument="Cam1", 

212 full_name="Aa", 

213 ) 

214 self.assertEqual(ref2, ref3) 

215 

216 # Try expanded refs. 

217 self.assertFalse(ref.dataId.hasRecords()) 

218 expanded = self.butler.get_dataset(ref.id, dimension_records=True) 

219 self.assertTrue(expanded.dataId.hasRecords()) 

220 

221 # The test datasets are all Exposure so storage class conversion 

222 # can not be tested until we fix that. For now at least test the 

223 # code paths. 

224 bias = self.butler.get_dataset(ref.id, storage_class=storage_class) 

225 self.assertEqual(bias.datasetType.storageClass, storage_class) 

226 

227 # Unknown dataset should not fail. 

228 self.assertIsNone(self.butler.get_dataset(uuid.uuid4())) 

229 self.assertIsNone(self.butler.get_dataset(uuid.uuid4(), storage_class="NumpyArray")) 

230 

231 def test_instantiate_via_butler_http_search(self): 

232 """Ensure that the primary Butler constructor's automatic search logic 

233 correctly locates and reads the configuration file and ends up with a 

234 RemoteButler pointing to the correct URL 

235 """ 

236 

237 # This is kind of a fragile test. Butler's search logic does a lot of 

238 # manipulations involving creating new ResourcePaths, and ResourcePath 

239 # doesn't use httpx so we can't easily inject the TestClient in there. 

240 # We don't have an actual valid HTTP URL to give to the constructor 

241 # because the test instance of the server is accessed via ASGI. 

242 # 

243 # Instead we just monkeypatch the HTTPResourcePath 'read' method and 

244 # hope that all ResourcePath HTTP reads during construction are going 

245 # to the server under test. 

246 def override_read(http_resource_path): 

247 return self.client.get(http_resource_path.geturl()).content 

248 

249 server_url = f"https://test.example/api/butler/repo/{TEST_REPOSITORY_NAME}/" 

250 

251 with patch.object(HttpResourcePath, "read", override_read): 

252 # Add access key to environment variables. RemoteButler 

253 # instantiation will throw an error if access key is not 

254 # available. 

255 with mock_env({_EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY: "fake-access-token"}): 

256 butler = Butler( 

257 server_url, 

258 collections=["collection1", "collection2"], 

259 run="collection2", 

260 ) 

261 butler_factory = LabeledButlerFactory({"server": server_url}) 

262 factory_created_butler = butler_factory.create_butler(label="server", access_token="token") 

263 self.assertIsInstance(butler, RemoteButler) 

264 self.assertIsInstance(factory_created_butler, RemoteButler) 

265 self.assertEqual(butler._server_url, server_url) 

266 self.assertEqual(factory_created_butler._server_url, server_url) 

267 

268 self.assertEqual(butler.collections, ("collection1", "collection2")) 

269 self.assertEqual(butler.run, "collection2") 

270 

271 def test_get(self): 

272 dataset_type = "test_metric_comp" 

273 data_id = {"instrument": "DummyCamComp", "visit": 423} 

274 collections = "ingest/run" 

275 # Test get() of a DatasetRef. 

276 ref = self.butler.find_dataset(dataset_type, data_id, collections=collections) 

277 metric = self.butler.get(ref) 

278 self.assertIsInstance(metric, MetricsExample) 

279 self.assertEqual(metric.summary, MetricTestRepo.METRICS_EXAMPLE_SUMMARY) 

280 

281 # Test get() by DataId. 

282 data_id_metric = self.butler.get(dataset_type, dataId=data_id, collections=collections) 

283 self.assertEqual(metric, data_id_metric) 

284 # Test get() by DataId dict augmented with kwargs. 

285 kwarg_metric = self.butler.get( 

286 dataset_type, dataId={"instrument": "DummyCamComp"}, collections=collections, visit=423 

287 ) 

288 self.assertEqual(metric, kwarg_metric) 

289 # Test get() by DataId DataCoordinate augmented with kwargs. 

290 coordinate = DataCoordinate.make_empty(self.butler.dimensions) 

291 kwarg_data_coordinate_metric = self.butler.get( 

292 dataset_type, dataId=coordinate, collections=collections, instrument="DummyCamComp", visit=423 

293 ) 

294 self.assertEqual(metric, kwarg_data_coordinate_metric) 

295 # Test get() of a non-existent DataId. 

296 invalid_data_id = {"instrument": "NotAValidlInstrument", "visit": 423} 

297 with self.assertRaises(LookupError): 

298 self.butler_without_error_propagation.get( 

299 dataset_type, dataId=invalid_data_id, collections=collections 

300 ) 

301 

302 # Test get() by DataId with default collections. 

303 default_collection_metric = self.butler_with_default_collection.get(dataset_type, dataId=data_id) 

304 self.assertEqual(metric, default_collection_metric) 

305 

306 # Test get() by DataId with no collections specified. 

307 with self.assertRaises(NoDefaultCollectionError): 

308 self.butler_without_error_propagation.get(dataset_type, dataId=data_id) 

309 

310 # Test looking up a non-existent ref 

311 invalid_ref = ref.replace(id=uuid.uuid4()) 

312 with self.assertRaises(LookupError): 

313 self.butler_without_error_propagation.get(invalid_ref) 

314 

315 with self.assertRaises(RuntimeError): 

316 self.butler_without_error_propagation.get(self.dataset_with_corrupted_data) 

317 

318 # Test storage class override 

319 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion") 

320 

321 def check_sc_override(converted): 

322 self.assertNotEqual(type(metric), type(converted)) 

323 self.assertIsInstance(converted, new_sc.pytype) 

324 self.assertEqual(metric, converted) 

325 

326 check_sc_override(self.butler.get(ref, storageClass=new_sc)) 

327 

328 # Test storage class override via DatasetRef. 

329 check_sc_override(self.butler.get(ref.overrideStorageClass("MetricsConversion"))) 

330 # Test storage class override via DatasetType. 

331 check_sc_override( 

332 self.butler.get( 

333 ref.datasetType.overrideStorageClass(new_sc), dataId=data_id, collections=collections 

334 ) 

335 ) 

336 

337 # Test component override via DatasetRef. 

338 component_ref = ref.makeComponentRef("summary") 

339 component_data = self.butler.get(component_ref) 

340 self.assertEqual(component_data, MetricTestRepo.METRICS_EXAMPLE_SUMMARY) 

341 

342 # Test overriding both storage class and component via DatasetRef. 

343 converted_component_data = self.butler.get(component_ref, storageClass="DictConvertibleModel") 

344 self.assertIsInstance(converted_component_data, DictConvertibleModel) 

345 self.assertEqual(converted_component_data.content, MetricTestRepo.METRICS_EXAMPLE_SUMMARY) 

346 

347 # Test component override via DatasetType. 

348 dataset_type_component_data = self.butler.get( 

349 component_ref.datasetType, component_ref.dataId, collections=collections 

350 ) 

351 self.assertEqual(dataset_type_component_data, MetricTestRepo.METRICS_EXAMPLE_SUMMARY) 

352 

353 def test_getURIs_no_components(self): 

354 # This dataset does not have components, and should return one URI. 

355 def check_uri(uri: ResourcePath): 

356 self.assertIsNotNone(uris.primaryURI) 

357 self.assertEqual(uris.primaryURI.scheme, "https") 

358 self.assertEqual(uris.primaryURI.read(), b"123") 

359 

360 uris = self.butler.getURIs(self.simple_dataset_ref) 

361 self.assertEqual(len(uris.componentURIs), 0) 

362 check_uri(uris.primaryURI) 

363 

364 check_uri(self.butler.getURI(self.simple_dataset_ref)) 

365 

366 def test_getURIs_multiple_components(self): 

367 # This dataset has multiple components, so we should get back multiple 

368 # URIs. 

369 dataset_type = "test_metric_comp" 

370 data_id = {"instrument": "DummyCamComp", "visit": 423} 

371 collections = "ingest/run" 

372 

373 def check_uris(uris: DatasetRefURIs): 

374 self.assertIsNone(uris.primaryURI) 

375 self.assertEqual(len(uris.componentURIs), 3) 

376 path = uris.componentURIs["summary"] 

377 self.assertEqual(path.scheme, "https") 

378 data = path.read() 

379 self.assertEqual(data, b"AM1: 5.2\nAM2: 30.6\n") 

380 

381 uris = self.butler.getURIs(dataset_type, dataId=data_id, collections=collections) 

382 check_uris(uris) 

383 

384 # Calling getURI on a multi-file dataset raises an exception 

385 with self.assertRaises(RuntimeError): 

386 self.butler.getURI(dataset_type, dataId=data_id, collections=collections) 

387 

388 # getURIs does NOT respect component overrides on the DatasetRef, 

389 # instead returning the parent's URIs. Unclear if this is "correct" 

390 # from a conceptual point of view, but this matches DirectButler 

391 # behavior. 

392 ref = self.butler.find_dataset(dataset_type, data_id=data_id, collections=collections) 

393 componentRef = ref.makeComponentRef("summary") 

394 componentUris = self.butler.getURIs(componentRef) 

395 check_uris(componentUris) 

396 

397 def test_auth_check(self): 

398 # This is checking that the unit-test middleware for validating the 

399 # authentication headers is working. It doesn't test actual server 

400 # functionality -- in a real deployment, the authentication headers are 

401 # handled by GafaelfawrIngress, not our app. 

402 with self.assertRaises(HTTPException) as cm: 

403 self.client.get("/v1/dataset_type/int") 

404 self.assertEqual(cm.exception.status_code, 401) 

405 

406 def test_exception_logging(self): 

407 app = create_app() 

408 

409 def raise_error(): 

410 raise RuntimeError("An unhandled error") 

411 

412 app.dependency_overrides[butler_factory_dependency] = raise_error 

413 client = _make_test_client(app, raise_server_exceptions=False) 

414 

415 with patch.object(safir.dependencies.logger, "logger_dependency") as mock_logger_dep: 

416 mock_logger = NonCallableMock(["aerror"]) 

417 

418 async def noop(): 

419 pass 

420 

421 mock_logger.aerror.return_value = noop() 

422 

423 async def get_logger(): 

424 return mock_logger 

425 

426 mock_logger_dep.return_value = get_logger() 

427 client.get( 

428 "/api/butler/repo/something/v1/dataset_type/int", 

429 headers={"X-Auth-Request-User": "user-name", "X-Butler-Client-Request-Id": "request-id"}, 

430 ) 

431 mock_logger_dep.assert_called_once() 

432 

433 mock_logger.aerror.assert_called_once() 

434 args, kwargs = mock_logger.aerror.call_args 

435 self.assertIsInstance(kwargs["exc_info"], RuntimeError) 

436 self.assertEqual(kwargs["clientRequestId"], "request-id") 

437 self.assertEqual(kwargs["user"], "user-name") 

438 

439 

440def _create_corrupted_dataset(repo: MetricTestRepo) -> DatasetRef: 

441 run = "corrupted-run" 

442 ref = repo.addDataset({"instrument": "DummyCamComp", "visit": 423}, run=run) 

443 uris = repo.butler.getURIs(ref) 

444 oneOfTheComponents = list(uris.componentURIs.values())[0] 

445 oneOfTheComponents.write("corrupted data") 

446 return ref 

447 

448 

449def _create_simple_dataset(butler: Butler) -> DatasetRef: 

450 dataset_type = addDatasetType(butler, "test_int", {"instrument", "visit"}, "int") 

451 ref = butler.put(123, dataset_type, dataId={"instrument": "DummyCamComp", "visit": 423}) 

452 return ref 

453 

454 

455if __name__ == "__main__": 

456 unittest.main()