Coverage for tests/test_server.py: 16%

202 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-07 02:45 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28import os.path 

29import unittest 

30import uuid 

31 

32from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel 

33 

34try: 

35 # Failing to import any of these should disable the tests. 

36 import safir.dependencies.logger 

37 from fastapi.testclient import TestClient 

38 from lsst.daf.butler.remote_butler import RemoteButler 

39 from lsst.daf.butler.remote_butler._authentication import _EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY 

40 from lsst.daf.butler.remote_butler.server import create_app 

41 from lsst.daf.butler.remote_butler.server._dependencies import butler_factory_dependency 

42 from lsst.daf.butler.tests.server import TEST_REPOSITORY_NAME, UnhandledServerError, create_test_server 

43except ImportError: 

44 create_test_server = None 

45 

46from unittest.mock import NonCallableMock, patch 

47 

48from lsst.daf.butler import ( 

49 Butler, 

50 DataCoordinate, 

51 DatasetNotFoundError, 

52 DatasetRef, 

53 LabeledButlerFactory, 

54 MissingDatasetTypeError, 

55 NoDefaultCollectionError, 

56 StorageClassFactory, 

57) 

58from lsst.daf.butler.datastore import DatasetRefURIs 

59from lsst.daf.butler.registry import RegistryDefaults 

60from lsst.daf.butler.tests import DatastoreMock, addDatasetType 

61from lsst.daf.butler.tests.utils import MetricsExample, MetricTestRepo, mock_env 

62from lsst.resources import ResourcePath 

63from lsst.resources.http import HttpResourcePath 

64 

65TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

66 

67 

68@unittest.skipIf(create_test_server is None, "Server dependencies not installed.") 

69class ButlerClientServerTestCase(unittest.TestCase): 

70 """Test for Butler client/server.""" 

71 

72 @classmethod 

73 def setUpClass(cls): 

74 server_instance = cls.enterClassContext(create_test_server(TESTDIR)) 

75 cls.client = server_instance.client 

76 cls.butler = server_instance.remote_butler 

77 cls.butler_without_error_propagation = server_instance.remote_butler_without_error_propagation 

78 

79 cls.storageClassFactory = StorageClassFactory() 

80 

81 cls.repo = MetricTestRepo.create_from_butler( 

82 server_instance.direct_butler, server_instance.config_file_path 

83 ) 

84 # Add a file with corrupted data for testing error conditions 

85 cls.dataset_with_corrupted_data = _create_corrupted_dataset(cls.repo) 

86 # All of the datasets that come with MetricTestRepo are disassembled 

87 # composites. Add a simple dataset for testing the common case. 

88 cls.simple_dataset_ref = _create_simple_dataset(server_instance.direct_butler) 

89 

90 # Populate the test server. 

91 # The DatastoreMock is required because the datasets referenced in 

92 # these imports do not point at real files. 

93 direct_butler = server_instance.direct_butler 

94 DatastoreMock.apply(direct_butler) 

95 direct_butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

96 direct_butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml")) 

97 

98 def test_health_check(self): 

99 try: 

100 import importlib.metadata 

101 

102 importlib.metadata.metadata("lsst.daf.butler") 

103 except ModuleNotFoundError: 

104 raise self.skipTest("Standard python package metadata not available. Butler not pip installed.") 

105 response = self.client.get("/") 

106 self.assertEqual(response.status_code, 200) 

107 self.assertEqual(response.json()["name"], "butler") 

108 

109 def test_dimension_universe(self): 

110 universe = self.butler.dimensions 

111 self.assertEqual(universe.namespace, "daf_butler") 

112 

113 def test_get_dataset_type(self): 

114 bias_type = self.butler.get_dataset_type("bias") 

115 self.assertEqual(bias_type.name, "bias") 

116 

117 with self.assertRaises(MissingDatasetTypeError): 

118 self.butler_without_error_propagation.get_dataset_type("not_bias") 

119 

120 def test_find_dataset(self): 

121 storage_class = self.storageClassFactory.getStorageClass("Exposure") 

122 

123 ref = self.butler.find_dataset("bias", collections="imported_g", detector=1, instrument="Cam1") 

124 self.assertIsInstance(ref, DatasetRef) 

125 self.assertEqual(ref.id, uuid.UUID("e15ab039-bc8b-4135-87c5-90902a7c0b22")) 

126 self.assertFalse(ref.dataId.hasRecords()) 

127 

128 # Try again with variation of parameters. 

129 ref_new = self.butler.find_dataset( 

130 "bias", 

131 {"detector": 1}, 

132 collections="imported_g", 

133 instrument="Cam1", 

134 dimension_records=True, 

135 ) 

136 self.assertEqual(ref_new, ref) 

137 self.assertTrue(ref_new.dataId.hasRecords()) 

138 

139 ref_new = self.butler.find_dataset( 

140 ref.datasetType, 

141 DataCoordinate.standardize(detector=1, instrument="Cam1", universe=self.butler.dimensions), 

142 collections="imported_g", 

143 storage_class=storage_class, 

144 ) 

145 self.assertEqual(ref_new, ref) 

146 

147 ref2 = self.butler.get_dataset(ref.id) 

148 self.assertEqual(ref2, ref) 

149 

150 # Use detector name to find it. 

151 ref3 = self.butler.find_dataset( 

152 ref.datasetType, 

153 collections="imported_g", 

154 instrument="Cam1", 

155 full_name="Aa", 

156 ) 

157 self.assertEqual(ref2, ref3) 

158 

159 # Try expanded refs. 

160 self.assertFalse(ref.dataId.hasRecords()) 

161 expanded = self.butler.get_dataset(ref.id, dimension_records=True) 

162 self.assertTrue(expanded.dataId.hasRecords()) 

163 

164 # The test datasets are all Exposure so storage class conversion 

165 # can not be tested until we fix that. For now at least test the 

166 # code paths. 

167 bias = self.butler.get_dataset(ref.id, storage_class=storage_class) 

168 self.assertEqual(bias.datasetType.storageClass, storage_class) 

169 

170 # Unknown dataset should not fail. 

171 self.assertIsNone(self.butler.get_dataset(uuid.uuid4())) 

172 self.assertIsNone(self.butler.get_dataset(uuid.uuid4(), storage_class="NumpyArray")) 

173 

174 def test_instantiate_via_butler_http_search(self): 

175 """Ensure that the primary Butler constructor's automatic search logic 

176 correctly locates and reads the configuration file and ends up with a 

177 RemoteButler pointing to the correct URL 

178 """ 

179 

180 # This is kind of a fragile test. Butler's search logic does a lot of 

181 # manipulations involving creating new ResourcePaths, and ResourcePath 

182 # doesn't use httpx so we can't easily inject the TestClient in there. 

183 # We don't have an actual valid HTTP URL to give to the constructor 

184 # because the test instance of the server is accessed via ASGI. 

185 # 

186 # Instead we just monkeypatch the HTTPResourcePath 'read' method and 

187 # hope that all ResourcePath HTTP reads during construction are going 

188 # to the server under test. 

189 def override_read(http_resource_path): 

190 return self.client.get(http_resource_path.geturl()).content 

191 

192 server_url = f"https://test.example/api/butler/repo/{TEST_REPOSITORY_NAME}/" 

193 

194 with patch.object(HttpResourcePath, "read", override_read): 

195 # RegistryDefaults.finish() needs to download the dimension 

196 # universe from the server, which will fail because there is no 

197 # server here. So mock it out. 

198 with patch.object(RegistryDefaults, "finish"): 

199 # Add access key to environment variables. RemoteButler 

200 # instantiation will throw an error if access key is not 

201 # available. 

202 with mock_env({_EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY: "fake-access-token"}): 

203 butler = Butler( 

204 server_url, 

205 collections=["collection1", "collection2"], 

206 run="collection2", 

207 ) 

208 self.assertIsInstance(butler, RemoteButler) 

209 self.assertEqual(butler._connection.server_url, server_url) 

210 self.assertEqual(butler.collections, ("collection1", "collection2")) 

211 self.assertEqual(butler.run, "collection2") 

212 

213 butler_factory = LabeledButlerFactory({"server": server_url}) 

214 factory_created_butler = butler_factory.create_butler(label="server", access_token="token") 

215 self.assertIsInstance(factory_created_butler, RemoteButler) 

216 self.assertEqual(factory_created_butler._connection.server_url, server_url) 

217 

218 def test_get(self): 

219 dataset_type = "test_metric_comp" 

220 data_id = {"instrument": "DummyCamComp", "visit": 423} 

221 collections = "ingest/run" 

222 # Test get() of a DatasetRef. 

223 ref = self.butler.find_dataset(dataset_type, data_id, collections=collections) 

224 metric = self.butler.get(ref) 

225 self.assertIsInstance(metric, MetricsExample) 

226 self.assertEqual(metric.summary, MetricTestRepo.METRICS_EXAMPLE_SUMMARY) 

227 

228 # Test get() by DataId. 

229 data_id_metric = self.butler.get(dataset_type, dataId=data_id, collections=collections) 

230 self.assertEqual(metric, data_id_metric) 

231 # Test get() by DataId dict augmented with kwargs. 

232 kwarg_metric = self.butler.get( 

233 dataset_type, dataId={"instrument": "DummyCamComp"}, collections=collections, visit=423 

234 ) 

235 self.assertEqual(metric, kwarg_metric) 

236 # Test get() by DataId DataCoordinate augmented with kwargs. 

237 coordinate = DataCoordinate.make_empty(self.butler.dimensions) 

238 kwarg_data_coordinate_metric = self.butler.get( 

239 dataset_type, dataId=coordinate, collections=collections, instrument="DummyCamComp", visit=423 

240 ) 

241 self.assertEqual(metric, kwarg_data_coordinate_metric) 

242 # Test get() of a non-existent DataId. 

243 invalid_data_id = {"instrument": "NotAValidlInstrument", "visit": 423} 

244 with self.assertRaises(DatasetNotFoundError): 

245 self.butler_without_error_propagation.get( 

246 dataset_type, dataId=invalid_data_id, collections=collections 

247 ) 

248 

249 # Test get() by DataId with default collections. 

250 butler_with_default_collection = self.butler._clone(collections="ingest/run") 

251 default_collection_metric = butler_with_default_collection.get(dataset_type, dataId=data_id) 

252 self.assertEqual(metric, default_collection_metric) 

253 

254 # Test get() by DataId with no collections specified. 

255 with self.assertRaises(NoDefaultCollectionError): 

256 self.butler_without_error_propagation.get(dataset_type, dataId=data_id) 

257 

258 # Test looking up a non-existent ref 

259 invalid_ref = ref.replace(id=uuid.uuid4()) 

260 with self.assertRaises(DatasetNotFoundError): 

261 self.butler_without_error_propagation.get(invalid_ref) 

262 

263 with self.assertRaises(RuntimeError): 

264 self.butler_without_error_propagation.get(self.dataset_with_corrupted_data) 

265 

266 # Test storage class override 

267 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion") 

268 

269 def check_sc_override(converted): 

270 self.assertNotEqual(type(metric), type(converted)) 

271 self.assertIsInstance(converted, new_sc.pytype) 

272 self.assertEqual(metric, converted) 

273 

274 check_sc_override(self.butler.get(ref, storageClass=new_sc)) 

275 

276 # Test storage class override via DatasetRef. 

277 check_sc_override(self.butler.get(ref.overrideStorageClass("MetricsConversion"))) 

278 # Test storage class override via DatasetType. 

279 check_sc_override( 

280 self.butler.get( 

281 ref.datasetType.overrideStorageClass(new_sc), dataId=data_id, collections=collections 

282 ) 

283 ) 

284 

285 # Test component override via DatasetRef. 

286 component_ref = ref.makeComponentRef("summary") 

287 component_data = self.butler.get(component_ref) 

288 self.assertEqual(component_data, MetricTestRepo.METRICS_EXAMPLE_SUMMARY) 

289 

290 # Test overriding both storage class and component via DatasetRef. 

291 converted_component_data = self.butler.get(component_ref, storageClass="DictConvertibleModel") 

292 self.assertIsInstance(converted_component_data, DictConvertibleModel) 

293 self.assertEqual(converted_component_data.content, MetricTestRepo.METRICS_EXAMPLE_SUMMARY) 

294 

295 # Test component override via DatasetType. 

296 dataset_type_component_data = self.butler.get( 

297 component_ref.datasetType, component_ref.dataId, collections=collections 

298 ) 

299 self.assertEqual(dataset_type_component_data, MetricTestRepo.METRICS_EXAMPLE_SUMMARY) 

300 

301 def test_getURIs_no_components(self): 

302 # This dataset does not have components, and should return one URI. 

303 def check_uri(uri: ResourcePath): 

304 self.assertIsNotNone(uris.primaryURI) 

305 self.assertEqual(uris.primaryURI.scheme, "https") 

306 self.assertEqual(uris.primaryURI.read(), b"123") 

307 

308 uris = self.butler.getURIs(self.simple_dataset_ref) 

309 self.assertEqual(len(uris.componentURIs), 0) 

310 check_uri(uris.primaryURI) 

311 

312 check_uri(self.butler.getURI(self.simple_dataset_ref)) 

313 

314 def test_getURIs_multiple_components(self): 

315 # This dataset has multiple components, so we should get back multiple 

316 # URIs. 

317 dataset_type = "test_metric_comp" 

318 data_id = {"instrument": "DummyCamComp", "visit": 423} 

319 collections = "ingest/run" 

320 

321 def check_uris(uris: DatasetRefURIs): 

322 self.assertIsNone(uris.primaryURI) 

323 self.assertEqual(len(uris.componentURIs), 3) 

324 path = uris.componentURIs["summary"] 

325 self.assertEqual(path.scheme, "https") 

326 data = path.read() 

327 self.assertEqual(data, b"AM1: 5.2\nAM2: 30.6\n") 

328 

329 uris = self.butler.getURIs(dataset_type, dataId=data_id, collections=collections) 

330 check_uris(uris) 

331 

332 # Calling getURI on a multi-file dataset raises an exception 

333 with self.assertRaises(RuntimeError): 

334 self.butler.getURI(dataset_type, dataId=data_id, collections=collections) 

335 

336 # getURIs does NOT respect component overrides on the DatasetRef, 

337 # instead returning the parent's URIs. Unclear if this is "correct" 

338 # from a conceptual point of view, but this matches DirectButler 

339 # behavior. 

340 ref = self.butler.find_dataset(dataset_type, data_id=data_id, collections=collections) 

341 componentRef = ref.makeComponentRef("summary") 

342 componentUris = self.butler.getURIs(componentRef) 

343 check_uris(componentUris) 

344 

345 def test_auth_check(self): 

346 # This is checking that the unit-test middleware for validating the 

347 # authentication headers is working. It doesn't test actual server 

348 # functionality -- in a real deployment, the authentication headers are 

349 # handled by GafaelfawrIngress, not our app. 

350 with self.assertRaises(UnhandledServerError) as cm: 

351 self.client.get("/v1/dataset_type/int") 

352 self.assertEqual(cm.exception.__cause__.status_code, 401) 

353 

354 def test_exception_logging(self): 

355 app = create_app() 

356 

357 def raise_error(): 

358 raise RuntimeError("An unhandled error") 

359 

360 app.dependency_overrides[butler_factory_dependency] = raise_error 

361 client = TestClient(app, raise_server_exceptions=False) 

362 

363 with patch.object(safir.dependencies.logger, "logger_dependency") as mock_logger_dep: 

364 mock_logger = NonCallableMock(["aerror"]) 

365 

366 async def noop(): 

367 pass 

368 

369 mock_logger.aerror.return_value = noop() 

370 

371 async def get_logger(): 

372 return mock_logger 

373 

374 mock_logger_dep.return_value = get_logger() 

375 client.get( 

376 "/api/butler/repo/something/v1/dataset_type/int", 

377 headers={"X-Auth-Request-User": "user-name", "X-Butler-Client-Request-Id": "request-id"}, 

378 ) 

379 mock_logger_dep.assert_called_once() 

380 

381 mock_logger.aerror.assert_called_once() 

382 args, kwargs = mock_logger.aerror.call_args 

383 self.assertIsInstance(kwargs["exc_info"], RuntimeError) 

384 self.assertEqual(kwargs["clientRequestId"], "request-id") 

385 self.assertEqual(kwargs["user"], "user-name") 

386 

387 

388def _create_corrupted_dataset(repo: MetricTestRepo) -> DatasetRef: 

389 run = "corrupted-run" 

390 ref = repo.addDataset({"instrument": "DummyCamComp", "visit": 423}, run=run) 

391 uris = repo.butler.getURIs(ref) 

392 oneOfTheComponents = list(uris.componentURIs.values())[0] 

393 oneOfTheComponents.write("corrupted data") 

394 return ref 

395 

396 

397def _create_simple_dataset(butler: Butler) -> DatasetRef: 

398 dataset_type = addDatasetType(butler, "test_int", {"instrument", "visit"}, "int") 

399 ref = butler.put(123, dataset_type, dataId={"instrument": "DummyCamComp", "visit": 423}, run="ingest/run") 

400 return ref 

401 

402 

403if __name__ == "__main__": 

404 unittest.main()