Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import os 

26import posixpath 

27import unittest 

28import tempfile 

29import shutil 

30import pickle 

31import string 

32import random 

33 

34try: 

35 import boto3 

36 import botocore 

37 from moto import mock_s3 

38except ImportError: 

39 boto3 = None 

40 

41 def mock_s3(cls): 

42 """A no-op decorator in case moto mock_s3 can not be imported. 

43 """ 

44 return cls 

45 

46from lsst.utils import doImport 

47from lsst.daf.butler.core.safeFileIo import safeMakeDir 

48from lsst.daf.butler import Butler, Config, ButlerConfig 

49from lsst.daf.butler import StorageClassFactory 

50from lsst.daf.butler import DatasetType, DatasetRef 

51from lsst.daf.butler import FileTemplateValidationError, ValidationError 

52from lsst.daf.butler import FileDataset 

53from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

54from lsst.daf.butler.core.location import ButlerURI 

55from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials, 

56 unsetAwsEnvCredentials) 

57 

58from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

59 

60TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

61 

62 

63def makeExampleMetrics(): 

64 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

65 {"a": [1, 2, 3], 

66 "b": {"blue": 5, "red": "green"}}, 

67 [563, 234, 456.7, 752, 8, 9, 27] 

68 ) 

69 

70 

71class TransactionTestError(Exception): 

72 """Specific error for testing transactions, to prevent misdiagnosing 

73 that might otherwise occur when a standard exception is used. 

74 """ 

75 pass 

76 

77 

78class ButlerConfigTests(unittest.TestCase): 

79 """Simple tests for ButlerConfig that are not tested in other test cases. 

80 """ 

81 

82 def testSearchPath(self): 

83 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

84 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

85 config1 = ButlerConfig(configFile) 

86 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

87 

88 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

89 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

90 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

91 self.assertIn("testConfigs", "\n".join(cm.output)) 

92 

93 key = ("datastore", "records", "table") 

94 self.assertNotEqual(config1[key], config2[key]) 

95 self.assertEqual(config2[key], "override_record") 

96 

97 

98class ButlerPutGetTests: 

99 """Helper method for running a suite of put/get tests from different 

100 butler configurations.""" 

101 

102 root = None 

103 

104 @staticmethod 

105 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

106 """Create a DatasetType and register it 

107 """ 

108 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

109 registry.registerDatasetType(datasetType) 

110 return datasetType 

111 

112 @classmethod 

113 def setUpClass(cls): 

114 cls.storageClassFactory = StorageClassFactory() 

115 cls.storageClassFactory.addFromConfig(cls.configFile) 

116 

117 def assertGetComponents(self, butler, datasetRef, components, reference): 

118 datasetTypeName = datasetRef.datasetType.name 

119 dataId = datasetRef.dataId 

120 for component in components: 

121 compTypeName = DatasetType.nameWithComponent(datasetTypeName, component) 

122 result = butler.get(compTypeName, dataId) 

123 self.assertEqual(result, getattr(reference, component)) 

124 

125 def tearDown(self): 

126 if self.root is not None and os.path.exists(self.root): 

127 shutil.rmtree(self.root, ignore_errors=True) 

128 

129 def runPutGetTest(self, storageClass, datasetTypeName): 

130 butler = Butler(self.tmpConfigFile, run="ingest") 

131 

132 # There will not be a collection yet 

133 collections = butler.registry.getAllCollections() 

134 self.assertEqual(collections, set()) 

135 

136 # Create and register a DatasetType 

137 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

138 

139 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

140 

141 # Add needed Dimensions 

142 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

143 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

144 "name": "d-r", 

145 "abstract_filter": "R"}) 

146 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

147 "name": "fourtwentythree", "physical_filter": "d-r"}) 

148 

149 # Create and store a dataset 

150 metric = makeExampleMetrics() 

151 dataId = {"instrument": "DummyCamComp", "visit": 423} 

152 

153 # Create a DatasetRef for put 

154 refIn = DatasetRef(datasetType, dataId, id=None) 

155 

156 # Put with a preexisting id should fail 

157 with self.assertRaises(ValueError): 

158 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

159 

160 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

161 # and once with a DatasetType 

162 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

163 with self.subTest(args=args): 

164 ref = butler.put(metric, *args) 

165 self.assertIsInstance(ref, DatasetRef) 

166 

167 # Test getDirect 

168 metricOut = butler.getDirect(ref) 

169 self.assertEqual(metric, metricOut) 

170 # Test get 

171 metricOut = butler.get(ref.datasetType.name, dataId) 

172 self.assertEqual(metric, metricOut) 

173 # Test get with a datasetRef 

174 metricOut = butler.get(ref) 

175 self.assertEqual(metric, metricOut) 

176 # Test getDeferred with dataId 

177 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get() 

178 self.assertEqual(metric, metricOut) 

179 # Test getDeferred with a datasetRef 

180 metricOut = butler.getDeferred(ref).get() 

181 self.assertEqual(metric, metricOut) 

182 

183 # Check we can get components 

184 if storageClass.isComposite(): 

185 self.assertGetComponents(butler, ref, 

186 ("summary", "data", "output"), metric) 

187 

188 # Remove from collection only; after that we shouldn't be able 

189 # to find it unless we use the dataset_id. 

190 butler.remove(*args, delete=False) 

191 with self.assertRaises(LookupError): 

192 butler.datasetExists(*args) 

193 # If we use the output ref with the dataset_id, we should 

194 # still be able to load it with getDirect(). 

195 self.assertEqual(metric, butler.getDirect(ref)) 

196 

197 # Reinsert into collection, then delete from Datastore *and* 

198 # remove from collection. 

199 butler.registry.associate(butler.collection, [ref]) 

200 butler.remove(*args) 

201 # Lookup with original args should still fail. 

202 with self.assertRaises(LookupError): 

203 butler.datasetExists(*args) 

204 # Now getDirect() should fail, too. 

205 with self.assertRaises(FileNotFoundError): 

206 butler.getDirect(ref) 

207 # Registry still knows about it, if we use the dataset_id. 

208 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

209 

210 # Put again, then remove completely (this generates a new 

211 # dataset record in registry, with a new ID - the old one 

212 # still exists but it is not in any collection so we don't 

213 # care). 

214 ref = butler.put(metric, *args) 

215 butler.remove(*args, remember=False) 

216 # Lookup with original args should still fail. 

217 with self.assertRaises(LookupError): 

218 butler.datasetExists(*args) 

219 # getDirect() should still fail. 

220 with self.assertRaises(FileNotFoundError): 

221 butler.getDirect(ref) 

222 # Registry shouldn't be able to find it by dataset_id anymore. 

223 self.assertIsNone(butler.registry.getDataset(ref.id)) 

224 

225 # Put the dataset again, since the last thing we did was remove it. 

226 ref = butler.put(metric, refIn) 

227 

228 # Get with parameters 

229 stop = 4 

230 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

231 self.assertNotEqual(metric, sliced) 

232 self.assertEqual(metric.summary, sliced.summary) 

233 self.assertEqual(metric.output, sliced.output) 

234 self.assertEqual(metric.data[:stop], sliced.data) 

235 # getDeferred with parameters 

236 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

237 self.assertNotEqual(metric, sliced) 

238 self.assertEqual(metric.summary, sliced.summary) 

239 self.assertEqual(metric.output, sliced.output) 

240 self.assertEqual(metric.data[:stop], sliced.data) 

241 # getDeferred with deferred parameters 

242 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

243 self.assertNotEqual(metric, sliced) 

244 self.assertEqual(metric.summary, sliced.summary) 

245 self.assertEqual(metric.output, sliced.output) 

246 self.assertEqual(metric.data[:stop], sliced.data) 

247 

248 if storageClass.isComposite(): 

249 # Delete one component and check that the other components 

250 # can still be retrieved 

251 metricOut = butler.get(ref.datasetType.name, dataId) 

252 compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary") 

253 compNameD = DatasetType.nameWithComponent(datasetTypeName, "data") 

254 summary = butler.get(compNameS, dataId) 

255 self.assertEqual(summary, metric.summary) 

256 self.assertTrue(butler.datastore.exists(ref.components["summary"])) 

257 

258 butler.remove(compNameS, dataId, remember=True) 

259 with self.assertRaises(LookupError): 

260 butler.datasetExists(compNameS, dataId) 

261 self.assertFalse(butler.datastore.exists(ref.components["summary"])) 

262 self.assertTrue(butler.datastore.exists(ref.components["data"])) 

263 data = butler.get(compNameD, dataId) 

264 self.assertEqual(data, metric.data) 

265 

266 # Combining a DatasetRef with a dataId should fail 

267 with self.assertRaises(ValueError): 

268 butler.get(ref, dataId) 

269 # Getting with an explicit ref should fail if the id doesn't match 

270 with self.assertRaises(ValueError): 

271 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

272 

273 # Getting a dataset with unknown parameters should fail 

274 with self.assertRaises(KeyError): 

275 butler.get(ref, parameters={"unsupported": True}) 

276 

277 # Check we have a collection 

278 collections = butler.registry.getAllCollections() 

279 self.assertEqual(collections, {"ingest", }) 

280 

281 # Clean up to check that we can remove something that may have 

282 # already had a component removed 

283 butler.remove(ref.datasetType.name, dataId) 

284 

285 # Add a dataset back in since some downstream tests require 

286 # something to be present 

287 ref = butler.put(metric, refIn) 

288 

289 return butler 

290 

291 # Construct a butler with no run or collection, but make it writeable. 

292 butler = Butler(self.tmpConfigFile, writeable=True) 

293 # Create and register a DatasetType 

294 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

295 datasetType = self.addDatasetType("example", dimensions, 

296 self.storageClassFactory.getStorageClass("StructuredData"), 

297 butler.registry) 

298 # Add needed Dimensions 

299 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

300 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

301 "name": "d-r", 

302 "abstract_filter": "R"}) 

303 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

304 "name": "fourtwentythree", "physical_filter": "d-r"}) 

305 dataId = {"instrument": "DummyCamComp", "visit": 423} 

306 # Create dataset. 

307 metric = makeExampleMetrics() 

308 # Register a new run and put dataset. 

309 run = "deferred" 

310 butler.registry.registerRun(run) 

311 ref = butler.put(metric, datasetType, dataId, run=run) 

312 # Putting with no run should fail with TypeError. 

313 with self.assertRaises(TypeError): 

314 butler.put(metric, datasetType, dataId) 

315 # Dataset should exist. 

316 self.assertTrue(butler.datasetExists(datasetType, dataId, collection=run)) 

317 # We should be able to get the dataset back, but with and without 

318 # a deferred dataset handle. 

319 self.assertEqual(metric, butler.get(datasetType, dataId, collection=run)) 

320 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collection=run).get()) 

321 # Trying to find the dataset without any collection is a TypeError. 

322 with self.assertRaises(TypeError): 

323 butler.datasetExists(datasetType, dataId) 

324 with self.assertRaises(TypeError): 

325 butler.get(datasetType, dataId) 

326 with self.assertRaises(TypeError): 

327 butler.remove(datasetType, dataId) 

328 # Associate the dataset with a different collection. 

329 butler.registry.associate("tagged", [ref]) 

330 # Deleting the dataset from the new collection should make it findable 

331 # in the original collection but without a Datastore entry. 

332 butler.remove(datasetType, dataId, collection="tagged") 

333 self.assertFalse(butler.datasetExists(datasetType, dataId, collection=run)) 

334 

335 

336class ButlerTests(ButlerPutGetTests): 

337 """Tests for Butler. 

338 """ 

339 useTempRoot = True 

340 

341 def setUp(self): 

342 """Create a new butler root for each test.""" 

343 if self.useTempRoot: 

344 self.root = tempfile.mkdtemp(dir=TESTDIR) 

345 Butler.makeRepo(self.root, config=Config(self.configFile)) 

346 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

347 else: 

348 self.root = None 

349 self.tmpConfigFile = self.configFile 

350 

351 def testConstructor(self): 

352 """Independent test of constructor. 

353 """ 

354 butler = Butler(self.tmpConfigFile, run="ingest") 

355 self.assertIsInstance(butler, Butler) 

356 

357 collections = butler.registry.getAllCollections() 

358 self.assertEqual(collections, set()) 

359 

360 butler2 = Butler(butler=butler, collection="other") 

361 self.assertEqual(butler2.collection, "other") 

362 self.assertIsNone(butler2.run) 

363 self.assertIs(butler.registry, butler2.registry) 

364 self.assertIs(butler.datastore, butler2.datastore) 

365 

366 def testBasicPutGet(self): 

367 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

368 self.runPutGetTest(storageClass, "test_metric") 

369 

370 def testCompositePutGetConcrete(self): 

371 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

372 self.runPutGetTest(storageClass, "test_metric") 

373 

374 def testCompositePutGetVirtual(self): 

375 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

376 self.runPutGetTest(storageClass, "test_metric_comp") 

377 

378 def testIngest(self): 

379 butler = Butler(self.tmpConfigFile, run="ingest") 

380 

381 # Create and register a DatasetType 

382 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

383 

384 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

385 datasetTypeName = "metric" 

386 

387 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

388 

389 # Add needed Dimensions 

390 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

391 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

392 "name": "d-r", 

393 "abstract_filter": "R"}) 

394 for detector in (1, 2): 

395 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

396 "full_name": f"detector{detector}"}) 

397 

398 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

399 "name": "fourtwentythree", "physical_filter": "d-r"}, 

400 {"instrument": "DummyCamComp", "id": 424, 

401 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

402 

403 formatter = doImport("lsst.daf.butler.formatters.yamlFormatter.YamlFormatter") 

404 dataRoot = os.path.join(TESTDIR, "data", "basic") 

405 datasets = [] 

406 for detector in (1, 2): 

407 detector_name = f"detector_{detector}" 

408 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

409 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

410 # Create a DatasetRef for ingest 

411 refIn = DatasetRef(datasetType, dataId, id=None) 

412 

413 datasets.append(FileDataset(path=metricFile, 

414 refs=[refIn], 

415 formatter=formatter)) 

416 

417 butler.ingest(*datasets, transfer="copy") 

418 

419 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

420 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

421 

422 metrics1 = butler.get(datasetTypeName, dataId1) 

423 metrics2 = butler.get(datasetTypeName, dataId2) 

424 self.assertNotEqual(metrics1, metrics2) 

425 

426 # Compare URIs 

427 uri1 = butler.getUri(datasetTypeName, dataId1) 

428 uri2 = butler.getUri(datasetTypeName, dataId2) 

429 self.assertNotEqual(uri1, uri2) 

430 

431 # Now do a multi-dataset but single file ingest 

432 metricFile = os.path.join(dataRoot, "detectors.yaml") 

433 refs = [] 

434 for detector in (1, 2): 

435 detector_name = f"detector_{detector}" 

436 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

437 # Create a DatasetRef for ingest 

438 refs.append(DatasetRef(datasetType, dataId, id=None)) 

439 

440 datasets = [] 

441 datasets.append(FileDataset(path=metricFile, 

442 refs=refs, 

443 formatter=MultiDetectorFormatter)) 

444 

445 butler.ingest(*datasets, transfer="copy") 

446 

447 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

448 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

449 

450 multi1 = butler.get(datasetTypeName, dataId1) 

451 multi2 = butler.get(datasetTypeName, dataId2) 

452 

453 self.assertEqual(multi1, metrics1) 

454 self.assertEqual(multi2, metrics2) 

455 

456 # Compare URIs 

457 uri1 = butler.getUri(datasetTypeName, dataId1) 

458 uri2 = butler.getUri(datasetTypeName, dataId2) 

459 self.assertEqual(uri1, uri2) 

460 

461 # Test that removing one does not break the second 

462 butler.remove(datasetTypeName, dataId1) 

463 with self.assertRaises(LookupError): 

464 butler.datasetExists(datasetTypeName, dataId1) 

465 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

466 multi2b = butler.get(datasetTypeName, dataId2) 

467 self.assertEqual(multi2, multi2b) 

468 

469 def testPickle(self): 

470 """Test pickle support. 

471 """ 

472 butler = Butler(self.tmpConfigFile, run="ingest") 

473 butlerOut = pickle.loads(pickle.dumps(butler)) 

474 self.assertIsInstance(butlerOut, Butler) 

475 self.assertEqual(butlerOut._config, butler._config) 

476 self.assertEqual(butlerOut.collection, butler.collection) 

477 self.assertEqual(butlerOut.run, butler.run) 

478 

479 def testGetDatasetTypes(self): 

480 butler = Butler(self.tmpConfigFile, run="ingest") 

481 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

482 dimensionEntries = [ 

483 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

484 {"instrument": "DummyCamComp"}), 

485 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}), 

486 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

487 ] 

488 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

489 # Add needed Dimensions 

490 for args in dimensionEntries: 

491 butler.registry.insertDimensionData(*args) 

492 

493 # When a DatasetType is added to the registry entries are created 

494 # for each component. Need entries for each component in the test 

495 # configuration otherwise validation won't work. The ones that 

496 # are deliberately broken will be ignored later. 

497 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi"} 

498 components = set() 

499 for datasetTypeName in datasetTypeNames: 

500 # Create and register a DatasetType 

501 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

502 

503 for componentName in storageClass.components: 

504 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

505 

506 fromRegistry = butler.registry.getAllDatasetTypes() 

507 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

508 

509 # Now that we have some dataset types registered, validate them 

510 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

511 "datasetType.component"]) 

512 

513 # Add a new datasetType that will fail template validation 

514 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

515 if self.validationCanFail: 

516 with self.assertRaises(ValidationError): 

517 butler.validateConfiguration() 

518 

519 # Rerun validation but with a subset of dataset type names 

520 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

521 

522 # Rerun validation but ignore the bad datasetType 

523 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

524 "datasetType.component"]) 

525 

526 def testTransaction(self): 

527 butler = Butler(self.tmpConfigFile, run="ingest") 

528 datasetTypeName = "test_metric" 

529 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

530 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

531 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

532 "abstract_filter": "R"}), 

533 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

534 "physical_filter": "d-r"})) 

535 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

536 metric = makeExampleMetrics() 

537 dataId = {"instrument": "DummyCam", "visit": 42} 

538 with self.assertRaises(TransactionTestError): 

539 with butler.transaction(): 

540 # Create and register a DatasetType 

541 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

542 # Add needed Dimensions 

543 for args in dimensionEntries: 

544 butler.registry.insertDimensionData(*args) 

545 # Store a dataset 

546 ref = butler.put(metric, datasetTypeName, dataId) 

547 self.assertIsInstance(ref, DatasetRef) 

548 # Test getDirect 

549 metricOut = butler.getDirect(ref) 

550 self.assertEqual(metric, metricOut) 

551 # Test get 

552 metricOut = butler.get(datasetTypeName, dataId) 

553 self.assertEqual(metric, metricOut) 

554 # Check we can get components 

555 self.assertGetComponents(butler, ref, 

556 ("summary", "data", "output"), metric) 

557 raise TransactionTestError("This should roll back the entire transaction") 

558 

559 with self.assertRaises(KeyError): 

560 butler.registry.getDatasetType(datasetTypeName) 

561 with self.assertRaises(LookupError): 

562 butler.registry.expandDataId(dataId) 

563 # Should raise KeyError for missing DatasetType 

564 with self.assertRaises(KeyError): 

565 butler.get(datasetTypeName, dataId) 

566 # Also check explicitly if Dataset entry is missing 

567 self.assertIsNone(butler.registry.find(butler.collection, datasetType, dataId)) 

568 # Direct retrieval should not find the file in the Datastore 

569 with self.assertRaises(FileNotFoundError): 

570 butler.getDirect(ref) 

571 

572 def testMakeRepo(self): 

573 """Test that we can write butler configuration to a new repository via 

574 the Butler.makeRepo interface and then instantiate a butler from the 

575 repo root. 

576 """ 

577 # Do not run the test if we know this datastore configuration does 

578 # not support a file system root 

579 if self.fullConfigKey is None: 

580 return 

581 

582 # Remove the file created in setUp 

583 os.unlink(self.tmpConfigFile) 

584 

585 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile)) 

586 limited = Config(self.configFile) 

587 butler1 = Butler(butlerConfig) 

588 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

589 config=Config(self.configFile)) 

590 full = Config(self.tmpConfigFile) 

591 butler2 = Butler(butlerConfig) 

592 # Butlers should have the same configuration regardless of whether 

593 # defaults were expanded. 

594 self.assertEqual(butler1._config, butler2._config) 

595 # Config files loaded directly should not be the same. 

596 self.assertNotEqual(limited, full) 

597 # Make sure "limited" doesn't have a few keys we know it should be 

598 # inheriting from defaults. 

599 self.assertIn(self.fullConfigKey, full) 

600 self.assertNotIn(self.fullConfigKey, limited) 

601 

602 # Collections don't appear until something is put in them 

603 collections1 = butler1.registry.getAllCollections() 

604 self.assertEqual(collections1, set()) 

605 self.assertEqual(butler2.registry.getAllCollections(), collections1) 

606 

607 # Check that a config with no associated file name will not 

608 # work properly with relocatable Butler repo 

609 butlerConfig.configFile = None 

610 with self.assertRaises(ValueError): 

611 Butler(butlerConfig) 

612 

613 def testStringification(self): 

614 butler = Butler(self.tmpConfigFile, run="ingest") 

615 butlerStr = str(butler) 

616 

617 if self.datastoreStr is not None: 

618 for testStr in self.datastoreStr: 

619 self.assertIn(testStr, butlerStr) 

620 if self.registryStr is not None: 

621 self.assertIn(self.registryStr, butlerStr) 

622 

623 datastoreName = butler.datastore.name 

624 if self.datastoreName is not None: 

625 for testStr in self.datastoreName: 

626 self.assertIn(testStr, datastoreName) 

627 

628 

629class FileLikeDatastoreButlerTests(ButlerTests): 

630 """Common tests and specialization of ButlerTests for butlers backed 

631 by datastores that inherit from FileLikeDatastore. 

632 """ 

633 

634 def checkFileExists(self, root, path): 

635 """Checks if file exists at a given path (relative to root). 

636 

637 Test testPutTemplates verifies actual physical existance of the files 

638 in the requested location. For POSIXDatastore this test is equivalent 

639 to `os.path.exists` call. 

640 """ 

641 return os.path.exists(os.path.join(root, path)) 

642 

643 def testPutTemplates(self): 

644 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

645 butler = Butler(self.tmpConfigFile, run="ingest") 

646 

647 # Add needed Dimensions 

648 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

649 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

650 "name": "d-r", 

651 "abstract_filter": "R"}) 

652 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

653 "physical_filter": "d-r"}) 

654 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

655 "physical_filter": "d-r"}) 

656 

657 # Create and store a dataset 

658 metric = makeExampleMetrics() 

659 

660 # Create two almost-identical DatasetTypes (both will use default 

661 # template) 

662 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

663 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

664 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

665 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

666 

667 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

668 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

669 dataId3 = {"instrument": "DummyCamComp", "visit": 425} 

670 

671 # Put with exactly the data ID keys needed 

672 ref = butler.put(metric, "metric1", dataId1) 

673 self.assertTrue(self.checkFileExists(butler.datastore.root, 

674 "ingest/metric1/d-r/DummyCamComp_423.pickle")) 

675 

676 # Check the template based on dimensions 

677 butler.datastore.templates.validateTemplates([ref]) 

678 

679 # Put with extra data ID keys (physical_filter is an optional 

680 # dependency); should not change template (at least the way we're 

681 # defining them to behave now; the important thing is that they 

682 # must be consistent). 

683 ref = butler.put(metric, "metric2", dataId2) 

684 self.assertTrue(self.checkFileExists(butler.datastore.root, 

685 "ingest/metric2/d-r/DummyCamComp_v423.pickle")) 

686 

687 # Check the template based on dimensions 

688 butler.datastore.templates.validateTemplates([ref]) 

689 

690 # Now use a file template that will not result in unique filenames 

691 ref = butler.put(metric, "metric3", dataId1) 

692 

693 # Check the template based on dimensions. This one is a bad template 

694 with self.assertRaises(FileTemplateValidationError): 

695 butler.datastore.templates.validateTemplates([ref]) 

696 

697 with self.assertRaises(FileExistsError): 

698 butler.put(metric, "metric3", dataId3) 

699 

700 def testImportExport(self): 

701 # Run put/get tests just to create and populate a repo. 

702 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

703 exportButler = self.runPutGetTest(storageClass, "test_metric") 

704 # Test that the repo actually has at least one dataset. 

705 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

706 self.assertGreater(len(datasets), 0) 

707 # Export those datasets. We used TemporaryDirectory because there 

708 # doesn't seem to be a way to get the filename (as opposed to the file 

709 # object) from any of tempfile's temporary-file context managers. 

710 with tempfile.TemporaryDirectory() as exportDir: 

711 # TODO: When PosixDatastore supports transfer-on-exist, add tests 

712 # for that. 

713 exportFile = os.path.join(exportDir, "exports.yaml") 

714 with exportButler.export(filename=exportFile) as export: 

715 export.saveDatasets(datasets) 

716 self.assertTrue(os.path.exists(exportFile)) 

717 with tempfile.TemporaryDirectory() as importDir: 

718 Butler.makeRepo(importDir, config=Config(self.configFile)) 

719 importButler = Butler(importDir, run="ingest") 

720 importButler.import_(filename=exportFile, directory=exportButler.datastore.root, 

721 transfer="symlink") 

722 for ref in datasets: 

723 with self.subTest(ref=ref): 

724 # Test for existence by passing in the DatasetType and 

725 # data ID separately, to avoid lookup by dataset_id. 

726 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

727 

728 

729class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

730 """PosixDatastore specialization of a butler""" 

731 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

732 fullConfigKey = ".datastore.formatters" 

733 validationCanFail = True 

734 datastoreStr = ["/tmp"] 

735 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"] 

736 registryStr = "/gen3.sqlite3" 

737 

738 

739class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

740 """InMemoryDatastore specialization of a butler""" 

741 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

742 fullConfigKey = None 

743 useTempRoot = False 

744 validationCanFail = False 

745 datastoreStr = ["datastore='InMemory"] 

746 datastoreName = ["InMemoryDatastore@"] 

747 registryStr = ":memory:" 

748 

749 def testIngest(self): 

750 pass 

751 

752 

753class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

754 """PosixDatastore specialization""" 

755 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

756 fullConfigKey = ".datastore.datastores.1.formatters" 

757 validationCanFail = True 

758 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"] 

759 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1", 

760 "SecondDatastore"] 

761 registryStr = "/gen3.sqlite3" 

762 

763 

764class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

765 """Test that a yaml file in one location can refer to a root in another.""" 

766 

767 datastoreStr = ["dir1"] 

768 # Disable the makeRepo test since we are deliberately not using 

769 # butler.yaml as the config name. 

770 fullConfigKey = None 

771 

772 def setUp(self): 

773 self.root = tempfile.mkdtemp(dir=TESTDIR) 

774 

775 # Make a new repository in one place 

776 self.dir1 = os.path.join(self.root, "dir1") 

777 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

778 

779 # Move the yaml file to a different place and add a "root" 

780 self.dir2 = os.path.join(self.root, "dir2") 

781 safeMakeDir(self.dir2) 

782 configFile1 = os.path.join(self.dir1, "butler.yaml") 

783 config = Config(configFile1) 

784 config["root"] = self.dir1 

785 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

786 config.dumpToFile(configFile2) 

787 os.remove(configFile1) 

788 self.tmpConfigFile = configFile2 

789 

790 def testFileLocations(self): 

791 self.assertNotEqual(self.dir1, self.dir2) 

792 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

793 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

794 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

795 

796 

797class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

798 """Test that a config file created by makeRepo outside of repo works.""" 

799 

800 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

801 

802 def setUp(self): 

803 self.root = tempfile.mkdtemp(dir=TESTDIR) 

804 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

805 

806 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

807 Butler.makeRepo(self.root, config=Config(self.configFile), 

808 outfile=self.tmpConfigFile) 

809 

810 def tearDown(self): 

811 if os.path.exists(self.root2): 

812 shutil.rmtree(self.root2, ignore_errors=True) 

813 super().tearDown() 

814 

815 def testConfigExistence(self): 

816 c = Config(self.tmpConfigFile) 

817 uri_config = ButlerURI(c["root"]) 

818 uri_expected = ButlerURI(self.root) 

819 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

820 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

821 

822 def testPutGet(self): 

823 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

824 self.runPutGetTest(storageClass, "test_metric") 

825 

826 

827class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

828 """Test that a config file created by makeRepo outside of repo works.""" 

829 

830 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

831 

832 def setUp(self): 

833 self.root = tempfile.mkdtemp(dir=TESTDIR) 

834 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

835 

836 self.tmpConfigFile = self.root2 

837 Butler.makeRepo(self.root, config=Config(self.configFile), 

838 outfile=self.tmpConfigFile) 

839 

840 def testConfigExistence(self): 

841 # Append the yaml file else Config constructor does not know the file 

842 # type. 

843 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

844 super().testConfigExistence() 

845 

846 

847class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

848 """Test that a config file created by makeRepo outside of repo works.""" 

849 

850 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

851 

852 def setUp(self): 

853 self.root = tempfile.mkdtemp(dir=TESTDIR) 

854 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

855 

856 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

857 Butler.makeRepo(self.root, config=Config(self.configFile), 

858 outfile=self.tmpConfigFile) 

859 

860 

861@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

862@mock_s3 

863class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

864 """S3Datastore specialization of a butler; an S3 storage Datastore + 

865 a local in-memory SqlRegistry. 

866 """ 

867 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

868 fullConfigKey = None 

869 validationCanFail = True 

870 

871 bucketName = "anybucketname" 

872 """Name of the Bucket that will be used in the tests. The name is read from 

873 the config file used with the tests during set-up. 

874 """ 

875 

876 root = "butlerRoot/" 

877 """Root repository directory expected to be used in case useTempRoot=False. 

878 Otherwise the root is set to a 20 characters long randomly generated string 

879 during set-up. 

880 """ 

881 

882 datastoreStr = [f"datastore={root}"] 

883 """Contains all expected root locations in a format expected to be 

884 returned by Butler stringification. 

885 """ 

886 

887 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"] 

888 """The expected format of the S3Datastore string.""" 

889 

890 registryStr = f":memory:" 

891 """Expected format of the Registry string.""" 

892 

893 def genRoot(self): 

894 """Returns a random string of len 20 to serve as a root 

895 name for the temporary bucket repo. 

896 

897 This is equivalent to tempfile.mkdtemp as this is what self.root 

898 becomes when useTempRoot is True. 

899 """ 

900 rndstr = "".join( 

901 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

902 ) 

903 return rndstr + "/" 

904 

905 def setUp(self): 

906 config = Config(self.configFile) 

907 uri = ButlerURI(config[".datastore.datastore.root"]) 

908 self.bucketName = uri.netloc 

909 

910 # set up some fake credentials if they do not exist 

911 self.usingDummyCredentials = setAwsEnvCredentials() 

912 

913 if self.useTempRoot: 

914 self.root = self.genRoot() 

915 rooturi = f"s3://{self.bucketName}/{self.root}" 

916 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

917 

918 # MOTO needs to know that we expect Bucket bucketname to exist 

919 # (this used to be the class attribute bucketName) 

920 s3 = boto3.resource("s3") 

921 s3.create_bucket(Bucket=self.bucketName) 

922 

923 self.datastoreStr = f"datastore={self.root}" 

924 self.datastoreName = [f"S3Datastore@{rooturi}"] 

925 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

926 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

927 

928 def tearDown(self): 

929 s3 = boto3.resource("s3") 

930 bucket = s3.Bucket(self.bucketName) 

931 try: 

932 bucket.objects.all().delete() 

933 except botocore.exceptions.ClientError as e: 

934 if e.response["Error"]["Code"] == "404": 

935 # the key was not reachable - pass 

936 pass 

937 else: 

938 raise 

939 

940 bucket = s3.Bucket(self.bucketName) 

941 bucket.delete() 

942 

943 # unset any potentially set dummy credentials 

944 if self.usingDummyCredentials: 

945 unsetAwsEnvCredentials() 

946 

947 def checkFileExists(self, root, relpath): 

948 """Checks if file exists at a given path (relative to root). 

949 

950 Test testPutTemplates verifies actual physical existance of the files 

951 in the requested location. For S3Datastore this test is equivalent to 

952 `lsst.daf.butler.core.s3utils.s3checkFileExists` call. 

953 """ 

954 uri = ButlerURI(root) 

955 client = boto3.client("s3") 

956 return s3CheckFileExists(uri, client=client)[0] 

957 

958 @unittest.expectedFailure 

959 def testImportExport(self): 

960 super().testImportExport() 

961 

962 

963if __name__ == "__main__": 963 ↛ 964line 963 didn't jump to line 964, because the condition on line 963 was never true

964 unittest.main()