Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import os 

26import posixpath 

27import unittest 

28import tempfile 

29import shutil 

30import pickle 

31import string 

32import random 

33 

34try: 

35 import boto3 

36 import botocore 

37 from moto import mock_s3 

38except ImportError: 

39 boto3 = None 

40 

41 def mock_s3(cls): 

42 """A no-op decorator in case moto mock_s3 can not be imported. 

43 """ 

44 return cls 

45 

46from lsst.utils import doImport 

47from lsst.daf.butler.core.safeFileIo import safeMakeDir 

48from lsst.daf.butler import Butler, Config, ButlerConfig 

49from lsst.daf.butler import StorageClassFactory 

50from lsst.daf.butler import DatasetType, DatasetRef 

51from lsst.daf.butler import FileTemplateValidationError, ValidationError 

52from lsst.daf.butler import FileDataset 

53from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

54from lsst.daf.butler.core.location import ButlerURI 

55from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials, 

56 unsetAwsEnvCredentials) 

57 

58from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

59 

60TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

61 

62 

63def makeExampleMetrics(): 

64 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

65 {"a": [1, 2, 3], 

66 "b": {"blue": 5, "red": "green"}}, 

67 [563, 234, 456.7, 752, 8, 9, 27] 

68 ) 

69 

70 

71class TransactionTestError(Exception): 

72 """Specific error for testing transactions, to prevent misdiagnosing 

73 that might otherwise occur when a standard exception is used. 

74 """ 

75 pass 

76 

77 

78class ButlerConfigTests(unittest.TestCase): 

79 """Simple tests for ButlerConfig that are not tested in other test cases. 

80 """ 

81 

82 def testSearchPath(self): 

83 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

84 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

85 config1 = ButlerConfig(configFile) 

86 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

87 

88 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

89 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

90 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

91 self.assertIn("testConfigs", "\n".join(cm.output)) 

92 

93 key = ("datastore", "records", "table") 

94 self.assertNotEqual(config1[key], config2[key]) 

95 self.assertEqual(config2[key], "override_record") 

96 

97 

98class ButlerPutGetTests: 

99 """Helper method for running a suite of put/get tests from different 

100 butler configurations.""" 

101 

102 root = None 

103 

104 @staticmethod 

105 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

106 """Create a DatasetType and register it 

107 """ 

108 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

109 registry.registerDatasetType(datasetType) 

110 return datasetType 

111 

112 @classmethod 

113 def setUpClass(cls): 

114 cls.storageClassFactory = StorageClassFactory() 

115 cls.storageClassFactory.addFromConfig(cls.configFile) 

116 

117 def assertGetComponents(self, butler, datasetRef, components, reference): 

118 datasetTypeName = datasetRef.datasetType.name 

119 dataId = datasetRef.dataId 

120 for component in components: 

121 compTypeName = DatasetType.nameWithComponent(datasetTypeName, component) 

122 result = butler.get(compTypeName, dataId) 

123 self.assertEqual(result, getattr(reference, component)) 

124 

125 def tearDown(self): 

126 if self.root is not None and os.path.exists(self.root): 

127 shutil.rmtree(self.root, ignore_errors=True) 

128 

129 def runPutGetTest(self, storageClass, datasetTypeName): 

130 butler = Butler(self.tmpConfigFile, run="ingest") 

131 

132 # There will not be a collection yet 

133 collections = butler.registry.getAllCollections() 

134 self.assertEqual(collections, set()) 

135 

136 # Create and register a DatasetType 

137 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

138 

139 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

140 

141 # Add needed Dimensions 

142 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

143 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

144 "name": "d-r", 

145 "abstract_filter": "R"}) 

146 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

147 "name": "fourtwentythree", "physical_filter": "d-r"}) 

148 

149 # Create and store a dataset 

150 metric = makeExampleMetrics() 

151 dataId = {"instrument": "DummyCamComp", "visit": 423} 

152 

153 # Create a DatasetRef for put 

154 refIn = DatasetRef(datasetType, dataId, id=None) 

155 

156 # Put with a preexisting id should fail 

157 with self.assertRaises(ValueError): 

158 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

159 

160 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

161 # and once with a DatasetType 

162 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

163 with self.subTest(args=args): 

164 ref = butler.put(metric, *args) 

165 self.assertIsInstance(ref, DatasetRef) 

166 

167 # Test getDirect 

168 metricOut = butler.getDirect(ref) 

169 self.assertEqual(metric, metricOut) 

170 # Test get 

171 metricOut = butler.get(ref.datasetType.name, dataId) 

172 self.assertEqual(metric, metricOut) 

173 # Test get with a datasetRef 

174 metricOut = butler.get(ref) 

175 self.assertEqual(metric, metricOut) 

176 # Test getDeferred with dataId 

177 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get() 

178 self.assertEqual(metric, metricOut) 

179 # Test getDeferred with a datasetRef 

180 metricOut = butler.getDeferred(ref).get() 

181 self.assertEqual(metric, metricOut) 

182 

183 # Check we can get components 

184 if storageClass.isComposite(): 

185 self.assertGetComponents(butler, ref, 

186 ("summary", "data", "output"), metric) 

187 

188 # Remove from collection only; after that we shouldn't be able 

189 # to find it unless we use the dataset_id. 

190 butler.remove(*args, delete=False) 

191 with self.assertRaises(LookupError): 

192 butler.datasetExists(*args) 

193 # If we use the output ref with the dataset_id, we should 

194 # still be able to load it with getDirect(). 

195 self.assertEqual(metric, butler.getDirect(ref)) 

196 

197 # Reinsert into collection, then delete from Datastore *and* 

198 # remove from collection. 

199 butler.registry.associate(butler.collection, [ref]) 

200 butler.remove(*args) 

201 # Lookup with original args should still fail. 

202 with self.assertRaises(LookupError): 

203 butler.datasetExists(*args) 

204 # Now getDirect() should fail, too. 

205 with self.assertRaises(FileNotFoundError): 

206 butler.getDirect(ref) 

207 # Registry still knows about it, if we use the dataset_id. 

208 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

209 

210 # Put again, then remove completely (this generates a new 

211 # dataset record in registry, with a new ID - the old one 

212 # still exists but it is not in any collection so we don't 

213 # care). 

214 ref = butler.put(metric, *args) 

215 butler.remove(*args, remember=False) 

216 # Lookup with original args should still fail. 

217 with self.assertRaises(LookupError): 

218 butler.datasetExists(*args) 

219 # getDirect() should still fail. 

220 with self.assertRaises(FileNotFoundError): 

221 butler.getDirect(ref) 

222 # Registry shouldn't be able to find it by dataset_id anymore. 

223 self.assertIsNone(butler.registry.getDataset(ref.id)) 

224 

225 # Put the dataset again, since the last thing we did was remove it. 

226 ref = butler.put(metric, refIn) 

227 

228 # Get with parameters 

229 stop = 4 

230 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

231 self.assertNotEqual(metric, sliced) 

232 self.assertEqual(metric.summary, sliced.summary) 

233 self.assertEqual(metric.output, sliced.output) 

234 self.assertEqual(metric.data[:stop], sliced.data) 

235 # getDeferred with parameters 

236 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

237 self.assertNotEqual(metric, sliced) 

238 self.assertEqual(metric.summary, sliced.summary) 

239 self.assertEqual(metric.output, sliced.output) 

240 self.assertEqual(metric.data[:stop], sliced.data) 

241 # getDeferred with deferred parameters 

242 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

243 self.assertNotEqual(metric, sliced) 

244 self.assertEqual(metric.summary, sliced.summary) 

245 self.assertEqual(metric.output, sliced.output) 

246 self.assertEqual(metric.data[:stop], sliced.data) 

247 

248 if storageClass.isComposite(): 

249 # Delete one component and check that the other components 

250 # can still be retrieved 

251 metricOut = butler.get(ref.datasetType.name, dataId) 

252 compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary") 

253 compNameD = DatasetType.nameWithComponent(datasetTypeName, "data") 

254 summary = butler.get(compNameS, dataId) 

255 self.assertEqual(summary, metric.summary) 

256 self.assertTrue(butler.datastore.exists(ref.components["summary"])) 

257 

258 butler.remove(compNameS, dataId, remember=True) 

259 with self.assertRaises(LookupError): 

260 butler.datasetExists(compNameS, dataId) 

261 self.assertFalse(butler.datastore.exists(ref.components["summary"])) 

262 self.assertTrue(butler.datastore.exists(ref.components["data"])) 

263 data = butler.get(compNameD, dataId) 

264 self.assertEqual(data, metric.data) 

265 

266 # Create a Dataset type that has the same name but is inconsistent. 

267 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions, 

268 self.storageClassFactory.getStorageClass("Config")) 

269 

270 # Getting with a dataset type that does not match registry fails 

271 with self.assertRaises(ValueError): 

272 butler.get(inconsistentDatasetType, dataId) 

273 

274 # Combining a DatasetRef with a dataId should fail 

275 with self.assertRaises(ValueError): 

276 butler.get(ref, dataId) 

277 # Getting with an explicit ref should fail if the id doesn't match 

278 with self.assertRaises(ValueError): 

279 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

280 

281 # Getting a dataset with unknown parameters should fail 

282 with self.assertRaises(KeyError): 

283 butler.get(ref, parameters={"unsupported": True}) 

284 

285 # Check we have a collection 

286 collections = butler.registry.getAllCollections() 

287 self.assertEqual(collections, {"ingest", }) 

288 

289 # Clean up to check that we can remove something that may have 

290 # already had a component removed 

291 butler.remove(ref.datasetType.name, dataId) 

292 

293 # Add a dataset back in since some downstream tests require 

294 # something to be present 

295 ref = butler.put(metric, refIn) 

296 

297 return butler 

298 

299 # Construct a butler with no run or collection, but make it writeable. 

300 butler = Butler(self.tmpConfigFile, writeable=True) 

301 # Create and register a DatasetType 

302 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

303 datasetType = self.addDatasetType("example", dimensions, 

304 self.storageClassFactory.getStorageClass("StructuredData"), 

305 butler.registry) 

306 # Add needed Dimensions 

307 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

308 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

309 "name": "d-r", 

310 "abstract_filter": "R"}) 

311 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

312 "name": "fourtwentythree", "physical_filter": "d-r"}) 

313 dataId = {"instrument": "DummyCamComp", "visit": 423} 

314 # Create dataset. 

315 metric = makeExampleMetrics() 

316 # Register a new run and put dataset. 

317 run = "deferred" 

318 butler.registry.registerRun(run) 

319 ref = butler.put(metric, datasetType, dataId, run=run) 

320 # Putting with no run should fail with TypeError. 

321 with self.assertRaises(TypeError): 

322 butler.put(metric, datasetType, dataId) 

323 # Dataset should exist. 

324 self.assertTrue(butler.datasetExists(datasetType, dataId, collection=run)) 

325 # We should be able to get the dataset back, but with and without 

326 # a deferred dataset handle. 

327 self.assertEqual(metric, butler.get(datasetType, dataId, collection=run)) 

328 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collection=run).get()) 

329 # Trying to find the dataset without any collection is a TypeError. 

330 with self.assertRaises(TypeError): 

331 butler.datasetExists(datasetType, dataId) 

332 with self.assertRaises(TypeError): 

333 butler.get(datasetType, dataId) 

334 with self.assertRaises(TypeError): 

335 butler.remove(datasetType, dataId) 

336 # Associate the dataset with a different collection. 

337 butler.registry.associate("tagged", [ref]) 

338 # Deleting the dataset from the new collection should make it findable 

339 # in the original collection but without a Datastore entry. 

340 butler.remove(datasetType, dataId, collection="tagged") 

341 self.assertFalse(butler.datasetExists(datasetType, dataId, collection=run)) 

342 

343 

344class ButlerTests(ButlerPutGetTests): 

345 """Tests for Butler. 

346 """ 

347 useTempRoot = True 

348 

349 def setUp(self): 

350 """Create a new butler root for each test.""" 

351 if self.useTempRoot: 

352 self.root = tempfile.mkdtemp(dir=TESTDIR) 

353 Butler.makeRepo(self.root, config=Config(self.configFile)) 

354 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

355 else: 

356 self.root = None 

357 self.tmpConfigFile = self.configFile 

358 

359 def testConstructor(self): 

360 """Independent test of constructor. 

361 """ 

362 butler = Butler(self.tmpConfigFile, run="ingest") 

363 self.assertIsInstance(butler, Butler) 

364 

365 collections = butler.registry.getAllCollections() 

366 self.assertEqual(collections, set()) 

367 

368 butler2 = Butler(butler=butler, collection="other") 

369 self.assertEqual(butler2.collection, "other") 

370 self.assertIsNone(butler2.run) 

371 self.assertIs(butler.registry, butler2.registry) 

372 self.assertIs(butler.datastore, butler2.datastore) 

373 

374 def testBasicPutGet(self): 

375 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

376 self.runPutGetTest(storageClass, "test_metric") 

377 

378 def testCompositePutGetConcrete(self): 

379 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

380 self.runPutGetTest(storageClass, "test_metric") 

381 

382 def testCompositePutGetVirtual(self): 

383 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

384 self.runPutGetTest(storageClass, "test_metric_comp") 

385 

386 def testIngest(self): 

387 butler = Butler(self.tmpConfigFile, run="ingest") 

388 

389 # Create and register a DatasetType 

390 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

391 

392 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

393 datasetTypeName = "metric" 

394 

395 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

396 

397 # Add needed Dimensions 

398 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

399 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

400 "name": "d-r", 

401 "abstract_filter": "R"}) 

402 for detector in (1, 2): 

403 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

404 "full_name": f"detector{detector}"}) 

405 

406 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

407 "name": "fourtwentythree", "physical_filter": "d-r"}, 

408 {"instrument": "DummyCamComp", "id": 424, 

409 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

410 

411 formatter = doImport("lsst.daf.butler.formatters.yamlFormatter.YamlFormatter") 

412 dataRoot = os.path.join(TESTDIR, "data", "basic") 

413 datasets = [] 

414 for detector in (1, 2): 

415 detector_name = f"detector_{detector}" 

416 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

417 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

418 # Create a DatasetRef for ingest 

419 refIn = DatasetRef(datasetType, dataId, id=None) 

420 

421 datasets.append(FileDataset(path=metricFile, 

422 refs=[refIn], 

423 formatter=formatter)) 

424 

425 butler.ingest(*datasets, transfer="copy") 

426 

427 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

428 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

429 

430 metrics1 = butler.get(datasetTypeName, dataId1) 

431 metrics2 = butler.get(datasetTypeName, dataId2) 

432 self.assertNotEqual(metrics1, metrics2) 

433 

434 # Compare URIs 

435 uri1 = butler.getUri(datasetTypeName, dataId1) 

436 uri2 = butler.getUri(datasetTypeName, dataId2) 

437 self.assertNotEqual(uri1, uri2) 

438 

439 # Now do a multi-dataset but single file ingest 

440 metricFile = os.path.join(dataRoot, "detectors.yaml") 

441 refs = [] 

442 for detector in (1, 2): 

443 detector_name = f"detector_{detector}" 

444 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

445 # Create a DatasetRef for ingest 

446 refs.append(DatasetRef(datasetType, dataId, id=None)) 

447 

448 datasets = [] 

449 datasets.append(FileDataset(path=metricFile, 

450 refs=refs, 

451 formatter=MultiDetectorFormatter)) 

452 

453 butler.ingest(*datasets, transfer="copy") 

454 

455 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

456 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

457 

458 multi1 = butler.get(datasetTypeName, dataId1) 

459 multi2 = butler.get(datasetTypeName, dataId2) 

460 

461 self.assertEqual(multi1, metrics1) 

462 self.assertEqual(multi2, metrics2) 

463 

464 # Compare URIs 

465 uri1 = butler.getUri(datasetTypeName, dataId1) 

466 uri2 = butler.getUri(datasetTypeName, dataId2) 

467 self.assertEqual(uri1, uri2) 

468 

469 # Test that removing one does not break the second 

470 butler.remove(datasetTypeName, dataId1) 

471 with self.assertRaises(LookupError): 

472 butler.datasetExists(datasetTypeName, dataId1) 

473 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

474 multi2b = butler.get(datasetTypeName, dataId2) 

475 self.assertEqual(multi2, multi2b) 

476 

477 def testPickle(self): 

478 """Test pickle support. 

479 """ 

480 butler = Butler(self.tmpConfigFile, run="ingest") 

481 butlerOut = pickle.loads(pickle.dumps(butler)) 

482 self.assertIsInstance(butlerOut, Butler) 

483 self.assertEqual(butlerOut._config, butler._config) 

484 self.assertEqual(butlerOut.collection, butler.collection) 

485 self.assertEqual(butlerOut.run, butler.run) 

486 

487 def testGetDatasetTypes(self): 

488 butler = Butler(self.tmpConfigFile, run="ingest") 

489 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

490 dimensionEntries = [ 

491 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

492 {"instrument": "DummyCamComp"}), 

493 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}), 

494 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

495 ] 

496 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

497 # Add needed Dimensions 

498 for args in dimensionEntries: 

499 butler.registry.insertDimensionData(*args) 

500 

501 # When a DatasetType is added to the registry entries are created 

502 # for each component. Need entries for each component in the test 

503 # configuration otherwise validation won't work. The ones that 

504 # are deliberately broken will be ignored later. 

505 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi"} 

506 components = set() 

507 for datasetTypeName in datasetTypeNames: 

508 # Create and register a DatasetType 

509 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

510 

511 for componentName in storageClass.components: 

512 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

513 

514 fromRegistry = butler.registry.getAllDatasetTypes() 

515 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

516 

517 # Now that we have some dataset types registered, validate them 

518 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

519 "datasetType.component"]) 

520 

521 # Add a new datasetType that will fail template validation 

522 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

523 if self.validationCanFail: 

524 with self.assertRaises(ValidationError): 

525 butler.validateConfiguration() 

526 

527 # Rerun validation but with a subset of dataset type names 

528 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

529 

530 # Rerun validation but ignore the bad datasetType 

531 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

532 "datasetType.component"]) 

533 

534 def testTransaction(self): 

535 butler = Butler(self.tmpConfigFile, run="ingest") 

536 datasetTypeName = "test_metric" 

537 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

538 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

539 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

540 "abstract_filter": "R"}), 

541 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

542 "physical_filter": "d-r"})) 

543 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

544 metric = makeExampleMetrics() 

545 dataId = {"instrument": "DummyCam", "visit": 42} 

546 with self.assertRaises(TransactionTestError): 

547 with butler.transaction(): 

548 # Create and register a DatasetType 

549 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

550 # Add needed Dimensions 

551 for args in dimensionEntries: 

552 butler.registry.insertDimensionData(*args) 

553 # Store a dataset 

554 ref = butler.put(metric, datasetTypeName, dataId) 

555 self.assertIsInstance(ref, DatasetRef) 

556 # Test getDirect 

557 metricOut = butler.getDirect(ref) 

558 self.assertEqual(metric, metricOut) 

559 # Test get 

560 metricOut = butler.get(datasetTypeName, dataId) 

561 self.assertEqual(metric, metricOut) 

562 # Check we can get components 

563 self.assertGetComponents(butler, ref, 

564 ("summary", "data", "output"), metric) 

565 raise TransactionTestError("This should roll back the entire transaction") 

566 

567 with self.assertRaises(KeyError): 

568 butler.registry.getDatasetType(datasetTypeName) 

569 with self.assertRaises(LookupError): 

570 butler.registry.expandDataId(dataId) 

571 # Should raise KeyError for missing DatasetType 

572 with self.assertRaises(KeyError): 

573 butler.get(datasetTypeName, dataId) 

574 # Also check explicitly if Dataset entry is missing 

575 self.assertIsNone(butler.registry.find(butler.collection, datasetType, dataId)) 

576 # Direct retrieval should not find the file in the Datastore 

577 with self.assertRaises(FileNotFoundError): 

578 butler.getDirect(ref) 

579 

580 def testMakeRepo(self): 

581 """Test that we can write butler configuration to a new repository via 

582 the Butler.makeRepo interface and then instantiate a butler from the 

583 repo root. 

584 """ 

585 # Do not run the test if we know this datastore configuration does 

586 # not support a file system root 

587 if self.fullConfigKey is None: 

588 return 

589 

590 # Remove the file created in setUp 

591 os.unlink(self.tmpConfigFile) 

592 

593 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile)) 

594 limited = Config(self.configFile) 

595 butler1 = Butler(butlerConfig) 

596 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

597 config=Config(self.configFile)) 

598 full = Config(self.tmpConfigFile) 

599 butler2 = Butler(butlerConfig) 

600 # Butlers should have the same configuration regardless of whether 

601 # defaults were expanded. 

602 self.assertEqual(butler1._config, butler2._config) 

603 # Config files loaded directly should not be the same. 

604 self.assertNotEqual(limited, full) 

605 # Make sure "limited" doesn't have a few keys we know it should be 

606 # inheriting from defaults. 

607 self.assertIn(self.fullConfigKey, full) 

608 self.assertNotIn(self.fullConfigKey, limited) 

609 

610 # Collections don't appear until something is put in them 

611 collections1 = butler1.registry.getAllCollections() 

612 self.assertEqual(collections1, set()) 

613 self.assertEqual(butler2.registry.getAllCollections(), collections1) 

614 

615 # Check that a config with no associated file name will not 

616 # work properly with relocatable Butler repo 

617 butlerConfig.configFile = None 

618 with self.assertRaises(ValueError): 

619 Butler(butlerConfig) 

620 

621 def testStringification(self): 

622 butler = Butler(self.tmpConfigFile, run="ingest") 

623 butlerStr = str(butler) 

624 

625 if self.datastoreStr is not None: 

626 for testStr in self.datastoreStr: 

627 self.assertIn(testStr, butlerStr) 

628 if self.registryStr is not None: 

629 self.assertIn(self.registryStr, butlerStr) 

630 

631 datastoreName = butler.datastore.name 

632 if self.datastoreName is not None: 

633 for testStr in self.datastoreName: 

634 self.assertIn(testStr, datastoreName) 

635 

636 

637class FileLikeDatastoreButlerTests(ButlerTests): 

638 """Common tests and specialization of ButlerTests for butlers backed 

639 by datastores that inherit from FileLikeDatastore. 

640 """ 

641 

642 def checkFileExists(self, root, path): 

643 """Checks if file exists at a given path (relative to root). 

644 

645 Test testPutTemplates verifies actual physical existance of the files 

646 in the requested location. For POSIXDatastore this test is equivalent 

647 to `os.path.exists` call. 

648 """ 

649 return os.path.exists(os.path.join(root, path)) 

650 

651 def testPutTemplates(self): 

652 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

653 butler = Butler(self.tmpConfigFile, run="ingest") 

654 

655 # Add needed Dimensions 

656 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

657 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

658 "name": "d-r", 

659 "abstract_filter": "R"}) 

660 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

661 "physical_filter": "d-r"}) 

662 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

663 "physical_filter": "d-r"}) 

664 

665 # Create and store a dataset 

666 metric = makeExampleMetrics() 

667 

668 # Create two almost-identical DatasetTypes (both will use default 

669 # template) 

670 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

671 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

672 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

673 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

674 

675 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

676 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

677 dataId3 = {"instrument": "DummyCamComp", "visit": 425} 

678 

679 # Put with exactly the data ID keys needed 

680 ref = butler.put(metric, "metric1", dataId1) 

681 self.assertTrue(self.checkFileExists(butler.datastore.root, 

682 "ingest/metric1/d-r/DummyCamComp_423.pickle")) 

683 

684 # Check the template based on dimensions 

685 butler.datastore.templates.validateTemplates([ref]) 

686 

687 # Put with extra data ID keys (physical_filter is an optional 

688 # dependency); should not change template (at least the way we're 

689 # defining them to behave now; the important thing is that they 

690 # must be consistent). 

691 ref = butler.put(metric, "metric2", dataId2) 

692 self.assertTrue(self.checkFileExists(butler.datastore.root, 

693 "ingest/metric2/d-r/DummyCamComp_v423.pickle")) 

694 

695 # Check the template based on dimensions 

696 butler.datastore.templates.validateTemplates([ref]) 

697 

698 # Now use a file template that will not result in unique filenames 

699 ref = butler.put(metric, "metric3", dataId1) 

700 

701 # Check the template based on dimensions. This one is a bad template 

702 with self.assertRaises(FileTemplateValidationError): 

703 butler.datastore.templates.validateTemplates([ref]) 

704 

705 with self.assertRaises(FileExistsError): 

706 butler.put(metric, "metric3", dataId3) 

707 

708 def testImportExport(self): 

709 # Run put/get tests just to create and populate a repo. 

710 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

711 exportButler = self.runPutGetTest(storageClass, "test_metric") 

712 # Test that the repo actually has at least one dataset. 

713 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

714 self.assertGreater(len(datasets), 0) 

715 # Export those datasets. We used TemporaryDirectory because there 

716 # doesn't seem to be a way to get the filename (as opposed to the file 

717 # object) from any of tempfile's temporary-file context managers. 

718 with tempfile.TemporaryDirectory() as exportDir: 

719 # TODO: When PosixDatastore supports transfer-on-exist, add tests 

720 # for that. 

721 exportFile = os.path.join(exportDir, "exports.yaml") 

722 with exportButler.export(filename=exportFile) as export: 

723 export.saveDatasets(datasets) 

724 self.assertTrue(os.path.exists(exportFile)) 

725 with tempfile.TemporaryDirectory() as importDir: 

726 Butler.makeRepo(importDir, config=Config(self.configFile)) 

727 importButler = Butler(importDir, run="ingest") 

728 importButler.import_(filename=exportFile, directory=exportButler.datastore.root, 

729 transfer="symlink") 

730 for ref in datasets: 

731 with self.subTest(ref=ref): 

732 # Test for existence by passing in the DatasetType and 

733 # data ID separately, to avoid lookup by dataset_id. 

734 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

735 

736 

737class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

738 """PosixDatastore specialization of a butler""" 

739 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

740 fullConfigKey = ".datastore.formatters" 

741 validationCanFail = True 

742 datastoreStr = ["/tmp"] 

743 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"] 

744 registryStr = "/gen3.sqlite3" 

745 

746 

747class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

748 """InMemoryDatastore specialization of a butler""" 

749 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

750 fullConfigKey = None 

751 useTempRoot = False 

752 validationCanFail = False 

753 datastoreStr = ["datastore='InMemory"] 

754 datastoreName = ["InMemoryDatastore@"] 

755 registryStr = ":memory:" 

756 

757 def testIngest(self): 

758 pass 

759 

760 

761class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

762 """PosixDatastore specialization""" 

763 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

764 fullConfigKey = ".datastore.datastores.1.formatters" 

765 validationCanFail = True 

766 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"] 

767 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1", 

768 "SecondDatastore"] 

769 registryStr = "/gen3.sqlite3" 

770 

771 

772class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

773 """Test that a yaml file in one location can refer to a root in another.""" 

774 

775 datastoreStr = ["dir1"] 

776 # Disable the makeRepo test since we are deliberately not using 

777 # butler.yaml as the config name. 

778 fullConfigKey = None 

779 

780 def setUp(self): 

781 self.root = tempfile.mkdtemp(dir=TESTDIR) 

782 

783 # Make a new repository in one place 

784 self.dir1 = os.path.join(self.root, "dir1") 

785 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

786 

787 # Move the yaml file to a different place and add a "root" 

788 self.dir2 = os.path.join(self.root, "dir2") 

789 safeMakeDir(self.dir2) 

790 configFile1 = os.path.join(self.dir1, "butler.yaml") 

791 config = Config(configFile1) 

792 config["root"] = self.dir1 

793 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

794 config.dumpToFile(configFile2) 

795 os.remove(configFile1) 

796 self.tmpConfigFile = configFile2 

797 

798 def testFileLocations(self): 

799 self.assertNotEqual(self.dir1, self.dir2) 

800 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

801 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

802 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

803 

804 

805class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

806 """Test that a config file created by makeRepo outside of repo works.""" 

807 

808 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

809 

810 def setUp(self): 

811 self.root = tempfile.mkdtemp(dir=TESTDIR) 

812 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

813 

814 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

815 Butler.makeRepo(self.root, config=Config(self.configFile), 

816 outfile=self.tmpConfigFile) 

817 

818 def tearDown(self): 

819 if os.path.exists(self.root2): 

820 shutil.rmtree(self.root2, ignore_errors=True) 

821 super().tearDown() 

822 

823 def testConfigExistence(self): 

824 c = Config(self.tmpConfigFile) 

825 uri_config = ButlerURI(c["root"]) 

826 uri_expected = ButlerURI(self.root) 

827 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

828 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

829 

830 def testPutGet(self): 

831 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

832 self.runPutGetTest(storageClass, "test_metric") 

833 

834 

835class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

836 """Test that a config file created by makeRepo outside of repo works.""" 

837 

838 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

839 

840 def setUp(self): 

841 self.root = tempfile.mkdtemp(dir=TESTDIR) 

842 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

843 

844 self.tmpConfigFile = self.root2 

845 Butler.makeRepo(self.root, config=Config(self.configFile), 

846 outfile=self.tmpConfigFile) 

847 

848 def testConfigExistence(self): 

849 # Append the yaml file else Config constructor does not know the file 

850 # type. 

851 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

852 super().testConfigExistence() 

853 

854 

855class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

856 """Test that a config file created by makeRepo outside of repo works.""" 

857 

858 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

859 

860 def setUp(self): 

861 self.root = tempfile.mkdtemp(dir=TESTDIR) 

862 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

863 

864 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

865 Butler.makeRepo(self.root, config=Config(self.configFile), 

866 outfile=self.tmpConfigFile) 

867 

868 

869@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

870@mock_s3 

871class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

872 """S3Datastore specialization of a butler; an S3 storage Datastore + 

873 a local in-memory SqlRegistry. 

874 """ 

875 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

876 fullConfigKey = None 

877 validationCanFail = True 

878 

879 bucketName = "anybucketname" 

880 """Name of the Bucket that will be used in the tests. The name is read from 

881 the config file used with the tests during set-up. 

882 """ 

883 

884 root = "butlerRoot/" 

885 """Root repository directory expected to be used in case useTempRoot=False. 

886 Otherwise the root is set to a 20 characters long randomly generated string 

887 during set-up. 

888 """ 

889 

890 datastoreStr = [f"datastore={root}"] 

891 """Contains all expected root locations in a format expected to be 

892 returned by Butler stringification. 

893 """ 

894 

895 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"] 

896 """The expected format of the S3Datastore string.""" 

897 

898 registryStr = f":memory:" 

899 """Expected format of the Registry string.""" 

900 

901 def genRoot(self): 

902 """Returns a random string of len 20 to serve as a root 

903 name for the temporary bucket repo. 

904 

905 This is equivalent to tempfile.mkdtemp as this is what self.root 

906 becomes when useTempRoot is True. 

907 """ 

908 rndstr = "".join( 

909 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

910 ) 

911 return rndstr + "/" 

912 

913 def setUp(self): 

914 config = Config(self.configFile) 

915 uri = ButlerURI(config[".datastore.datastore.root"]) 

916 self.bucketName = uri.netloc 

917 

918 # set up some fake credentials if they do not exist 

919 self.usingDummyCredentials = setAwsEnvCredentials() 

920 

921 if self.useTempRoot: 

922 self.root = self.genRoot() 

923 rooturi = f"s3://{self.bucketName}/{self.root}" 

924 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

925 

926 # MOTO needs to know that we expect Bucket bucketname to exist 

927 # (this used to be the class attribute bucketName) 

928 s3 = boto3.resource("s3") 

929 s3.create_bucket(Bucket=self.bucketName) 

930 

931 self.datastoreStr = f"datastore={self.root}" 

932 self.datastoreName = [f"S3Datastore@{rooturi}"] 

933 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

934 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

935 

936 def tearDown(self): 

937 s3 = boto3.resource("s3") 

938 bucket = s3.Bucket(self.bucketName) 

939 try: 

940 bucket.objects.all().delete() 

941 except botocore.exceptions.ClientError as e: 

942 if e.response["Error"]["Code"] == "404": 

943 # the key was not reachable - pass 

944 pass 

945 else: 

946 raise 

947 

948 bucket = s3.Bucket(self.bucketName) 

949 bucket.delete() 

950 

951 # unset any potentially set dummy credentials 

952 if self.usingDummyCredentials: 

953 unsetAwsEnvCredentials() 

954 

955 def checkFileExists(self, root, relpath): 

956 """Checks if file exists at a given path (relative to root). 

957 

958 Test testPutTemplates verifies actual physical existance of the files 

959 in the requested location. For S3Datastore this test is equivalent to 

960 `lsst.daf.butler.core.s3utils.s3checkFileExists` call. 

961 """ 

962 uri = ButlerURI(root) 

963 client = boto3.client("s3") 

964 return s3CheckFileExists(uri, client=client)[0] 

965 

966 @unittest.expectedFailure 

967 def testImportExport(self): 

968 super().testImportExport() 

969 

970 

971if __name__ == "__main__": 971 ↛ 972line 971 didn't jump to line 972, because the condition on line 971 was never true

972 unittest.main()