Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import os 

26import posixpath 

27import unittest 

28import tempfile 

29import shutil 

30import pickle 

31import string 

32import random 

33 

34try: 

35 import boto3 

36 import botocore 

37 from moto import mock_s3 

38except ImportError: 

39 boto3 = None 

40 

41 def mock_s3(cls): 

42 """A no-op decorator in case moto mock_s3 can not be imported. 

43 """ 

44 return cls 

45 

46from lsst.utils import doImport 

47from lsst.daf.butler.core.utils import safeMakeDir 

48from lsst.daf.butler import Butler, Config, ButlerConfig 

49from lsst.daf.butler import StorageClassFactory 

50from lsst.daf.butler import DatasetType, DatasetRef 

51from lsst.daf.butler import FileTemplateValidationError, ValidationError 

52from lsst.daf.butler import FileDataset 

53from lsst.daf.butler import CollectionSearch, CollectionType 

54from lsst.daf.butler import ButlerURI 

55from lsst.daf.butler import script 

56from lsst.daf.butler.registry import MissingCollectionError 

57from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

58from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials, 

59 unsetAwsEnvCredentials) 

60 

61from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

62 

63TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

64 

65 

66def makeExampleMetrics(): 

67 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

68 {"a": [1, 2, 3], 

69 "b": {"blue": 5, "red": "green"}}, 

70 [563, 234, 456.7, 752, 8, 9, 27] 

71 ) 

72 

73 

74class TransactionTestError(Exception): 

75 """Specific error for testing transactions, to prevent misdiagnosing 

76 that might otherwise occur when a standard exception is used. 

77 """ 

78 pass 

79 

80 

81class ButlerConfigTests(unittest.TestCase): 

82 """Simple tests for ButlerConfig that are not tested in other test cases. 

83 """ 

84 

85 def testSearchPath(self): 

86 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

87 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

88 config1 = ButlerConfig(configFile) 

89 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

90 

91 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

92 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

93 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

94 self.assertIn("testConfigs", "\n".join(cm.output)) 

95 

96 key = ("datastore", "records", "table") 

97 self.assertNotEqual(config1[key], config2[key]) 

98 self.assertEqual(config2[key], "override_record") 

99 

100 

101class ButlerPutGetTests: 

102 """Helper method for running a suite of put/get tests from different 

103 butler configurations.""" 

104 

105 root = None 

106 

107 @staticmethod 

108 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

109 """Create a DatasetType and register it 

110 """ 

111 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

112 registry.registerDatasetType(datasetType) 

113 return datasetType 

114 

115 @classmethod 

116 def setUpClass(cls): 

117 cls.storageClassFactory = StorageClassFactory() 

118 cls.storageClassFactory.addFromConfig(cls.configFile) 

119 

120 def assertGetComponents(self, butler, datasetRef, components, reference): 

121 datasetType = datasetRef.datasetType 

122 dataId = datasetRef.dataId 

123 for component in components: 

124 compTypeName = datasetType.componentTypeName(component) 

125 result = butler.get(compTypeName, dataId) 

126 self.assertEqual(result, getattr(reference, component)) 

127 

128 def tearDown(self): 

129 if self.root is not None and os.path.exists(self.root): 

130 shutil.rmtree(self.root, ignore_errors=True) 

131 

132 def runPutGetTest(self, storageClass, datasetTypeName): 

133 # New datasets will be added to run and tag, but we will only look in 

134 # tag when looking up datasets. 

135 run = "ingest/run" 

136 tag = "ingest" 

137 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag]) 

138 

139 # There will not be a collection yet 

140 collections = set(butler.registry.queryCollections()) 

141 self.assertEqual(collections, set([run, tag])) 

142 

143 # Create and register a DatasetType 

144 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

145 

146 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

147 

148 # Try to create one that will have a name that is too long 

149 with self.assertRaises(Exception) as cm: 

150 self.addDatasetType("DatasetTypeNameTooLong" * 50, dimensions, storageClass, butler.registry) 

151 self.assertIn("check constraint", str(cm.exception).lower()) 

152 

153 # Add needed Dimensions 

154 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

155 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

156 "name": "d-r", 

157 "abstract_filter": "R"}) 

158 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp", 

159 "id": 1, 

160 "name": "default"}) 

161 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

162 "name": "fourtwentythree", "physical_filter": "d-r", 

163 "visit_system": 1}) 

164 

165 # Add a second visit for some later tests 

166 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424, 

167 "name": "fourtwentyfour", "physical_filter": "d-r", 

168 "visit_system": 1}) 

169 

170 # Create and store a dataset 

171 metric = makeExampleMetrics() 

172 dataId = {"instrument": "DummyCamComp", "visit": 423} 

173 

174 # Create a DatasetRef for put 

175 refIn = DatasetRef(datasetType, dataId, id=None) 

176 

177 # Put with a preexisting id should fail 

178 with self.assertRaises(ValueError): 

179 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

180 

181 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

182 # and once with a DatasetType 

183 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

184 with self.subTest(args=args): 

185 ref = butler.put(metric, *args) 

186 self.assertIsInstance(ref, DatasetRef) 

187 

188 # Test getDirect 

189 metricOut = butler.getDirect(ref) 

190 self.assertEqual(metric, metricOut) 

191 # Test get 

192 metricOut = butler.get(ref.datasetType.name, dataId) 

193 self.assertEqual(metric, metricOut) 

194 # Test get with a datasetRef 

195 metricOut = butler.get(ref) 

196 self.assertEqual(metric, metricOut) 

197 # Test getDeferred with dataId 

198 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get() 

199 self.assertEqual(metric, metricOut) 

200 # Test getDeferred with a datasetRef 

201 metricOut = butler.getDeferred(ref).get() 

202 self.assertEqual(metric, metricOut) 

203 

204 # Check we can get components 

205 if storageClass.isComposite(): 

206 self.assertGetComponents(butler, ref, 

207 ("summary", "data", "output"), metric) 

208 

209 # Remove from the tagged collection only; after that we 

210 # shouldn't be able to find it unless we use the dataset_id. 

211 butler.pruneDatasets([ref]) 

212 with self.assertRaises(LookupError): 

213 butler.datasetExists(*args) 

214 # Registry still knows about it, if we use the dataset_id. 

215 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

216 # If we use the output ref with the dataset_id, we should 

217 # still be able to load it with getDirect(). 

218 self.assertEqual(metric, butler.getDirect(ref)) 

219 

220 # Reinsert into collection, then delete from Datastore *and* 

221 # remove from collection. 

222 butler.registry.associate(tag, [ref]) 

223 butler.pruneDatasets([ref], unstore=True) 

224 # Lookup with original args should still fail. 

225 with self.assertRaises(LookupError): 

226 butler.datasetExists(*args) 

227 # Now getDirect() should fail, too. 

228 with self.assertRaises(FileNotFoundError, msg=f"Checking ref {ref} not found"): 

229 butler.getDirect(ref) 

230 # Registry still knows about it, if we use the dataset_id. 

231 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

232 

233 # Now remove the dataset completely. 

234 butler.pruneDatasets([ref], purge=True, unstore=True) 

235 # Lookup with original args should still fail. 

236 with self.assertRaises(LookupError): 

237 butler.datasetExists(*args) 

238 # getDirect() should still fail. 

239 with self.assertRaises(FileNotFoundError): 

240 butler.getDirect(ref) 

241 # Registry shouldn't be able to find it by dataset_id anymore. 

242 self.assertIsNone(butler.registry.getDataset(ref.id)) 

243 

244 # Put the dataset again, since the last thing we did was remove it. 

245 ref = butler.put(metric, refIn) 

246 

247 # Get with parameters 

248 stop = 4 

249 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

250 self.assertNotEqual(metric, sliced) 

251 self.assertEqual(metric.summary, sliced.summary) 

252 self.assertEqual(metric.output, sliced.output) 

253 self.assertEqual(metric.data[:stop], sliced.data) 

254 # getDeferred with parameters 

255 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

256 self.assertNotEqual(metric, sliced) 

257 self.assertEqual(metric.summary, sliced.summary) 

258 self.assertEqual(metric.output, sliced.output) 

259 self.assertEqual(metric.data[:stop], sliced.data) 

260 # getDeferred with deferred parameters 

261 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

262 self.assertNotEqual(metric, sliced) 

263 self.assertEqual(metric.summary, sliced.summary) 

264 self.assertEqual(metric.output, sliced.output) 

265 self.assertEqual(metric.data[:stop], sliced.data) 

266 

267 if storageClass.isComposite(): 

268 # Check that components can be retrieved 

269 metricOut = butler.get(ref.datasetType.name, dataId) 

270 compNameS = ref.datasetType.componentTypeName("summary") 

271 compNameD = ref.datasetType.componentTypeName("data") 

272 summary = butler.get(compNameS, dataId) 

273 self.assertEqual(summary, metric.summary) 

274 data = butler.get(compNameD, dataId) 

275 self.assertEqual(data, metric.data) 

276 

277 if "counter" in storageClass.readComponents: 

278 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

279 self.assertEqual(count, len(data)) 

280 

281 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId, 

282 parameters={"slice": slice(stop)}) 

283 self.assertEqual(count, stop) 

284 

285 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

286 summary = butler.getDirect(compRef) 

287 self.assertEqual(summary, metric.summary) 

288 

289 # Create a Dataset type that has the same name but is inconsistent. 

290 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions, 

291 self.storageClassFactory.getStorageClass("Config")) 

292 

293 # Getting with a dataset type that does not match registry fails 

294 with self.assertRaises(ValueError): 

295 butler.get(inconsistentDatasetType, dataId) 

296 

297 # Combining a DatasetRef with a dataId should fail 

298 with self.assertRaises(ValueError): 

299 butler.get(ref, dataId) 

300 # Getting with an explicit ref should fail if the id doesn't match 

301 with self.assertRaises(ValueError): 

302 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

303 

304 # Getting a dataset with unknown parameters should fail 

305 with self.assertRaises(KeyError): 

306 butler.get(ref, parameters={"unsupported": True}) 

307 

308 # Check we have a collection 

309 collections = set(butler.registry.queryCollections()) 

310 self.assertEqual(collections, {run, tag}) 

311 

312 # Clean up to check that we can remove something that may have 

313 # already had a component removed 

314 butler.pruneDatasets([ref], unstore=True, purge=True) 

315 

316 # Add a dataset back in since some downstream tests require 

317 # something to be present 

318 ref = butler.put(metric, refIn) 

319 

320 return butler 

321 

322 def testDeferredCollectionPassing(self): 

323 # Construct a butler with no run or collection, but make it writeable. 

324 butler = Butler(self.tmpConfigFile, writeable=True) 

325 # Create and register a DatasetType 

326 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

327 datasetType = self.addDatasetType("example", dimensions, 

328 self.storageClassFactory.getStorageClass("StructuredData"), 

329 butler.registry) 

330 # Add needed Dimensions 

331 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

332 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

333 "name": "d-r", 

334 "abstract_filter": "R"}) 

335 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

336 "name": "fourtwentythree", "physical_filter": "d-r"}) 

337 dataId = {"instrument": "DummyCamComp", "visit": 423} 

338 # Create dataset. 

339 metric = makeExampleMetrics() 

340 # Register a new run and put dataset. 

341 run = "deferred" 

342 butler.registry.registerRun(run) 

343 ref = butler.put(metric, datasetType, dataId, run=run) 

344 # Putting with no run should fail with TypeError. 

345 with self.assertRaises(TypeError): 

346 butler.put(metric, datasetType, dataId) 

347 # Dataset should exist. 

348 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

349 # We should be able to get the dataset back, but with and without 

350 # a deferred dataset handle. 

351 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

352 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

353 # Trying to find the dataset without any collection is a TypeError. 

354 with self.assertRaises(TypeError): 

355 butler.datasetExists(datasetType, dataId) 

356 with self.assertRaises(TypeError): 

357 butler.get(datasetType, dataId) 

358 # Associate the dataset with a different collection. 

359 butler.registry.registerCollection("tagged") 

360 butler.registry.associate("tagged", [ref]) 

361 # Deleting the dataset from the new collection should make it findable 

362 # in the original collection. 

363 butler.pruneDatasets([ref], tags=["tagged"]) 

364 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

365 

366 

367class ButlerTests(ButlerPutGetTests): 

368 """Tests for Butler. 

369 """ 

370 useTempRoot = True 

371 

372 def setUp(self): 

373 """Create a new butler root for each test.""" 

374 if self.useTempRoot: 

375 self.root = tempfile.mkdtemp(dir=TESTDIR) 

376 Butler.makeRepo(self.root, config=Config(self.configFile)) 

377 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

378 else: 

379 self.root = None 

380 self.tmpConfigFile = self.configFile 

381 

382 def testConstructor(self): 

383 """Independent test of constructor. 

384 """ 

385 butler = Butler(self.tmpConfigFile, run="ingest") 

386 self.assertIsInstance(butler, Butler) 

387 

388 collections = set(butler.registry.queryCollections()) 

389 self.assertEqual(collections, {"ingest"}) 

390 

391 butler2 = Butler(butler=butler, collections=["other"]) 

392 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"])) 

393 self.assertIsNone(butler2.run) 

394 self.assertIs(butler.registry, butler2.registry) 

395 self.assertIs(butler.datastore, butler2.datastore) 

396 

397 def testBasicPutGet(self): 

398 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

399 self.runPutGetTest(storageClass, "test_metric") 

400 

401 def testCompositePutGetConcrete(self): 

402 

403 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

404 butler = self.runPutGetTest(storageClass, "test_metric") 

405 

406 # Should *not* be disassembled 

407 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

408 self.assertEqual(len(datasets), 1) 

409 uri, components = butler.getURIs(datasets[0]) 

410 self.assertIsInstance(uri, ButlerURI) 

411 self.assertFalse(components) 

412 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

413 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

414 

415 # Predicted dataset 

416 dataId = {"instrument": "DummyCamComp", "visit": 424} 

417 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

418 self.assertFalse(components) 

419 self.assertIsInstance(uri, ButlerURI) 

420 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

421 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

422 

423 def testCompositePutGetVirtual(self): 

424 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

425 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

426 

427 # Should be disassembled 

428 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

429 self.assertEqual(len(datasets), 1) 

430 uri, components = butler.getURIs(datasets[0]) 

431 

432 if butler.datastore.isEphemeral: 

433 # Never disassemble in-memory datastore 

434 self.assertIsInstance(uri, ButlerURI) 

435 self.assertFalse(components) 

436 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

437 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

438 else: 

439 self.assertIsNone(uri) 

440 self.assertEqual(set(components), set(storageClass.components)) 

441 for compuri in components.values(): 

442 self.assertIsInstance(compuri, ButlerURI) 

443 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

444 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

445 

446 # Predicted dataset 

447 dataId = {"instrument": "DummyCamComp", "visit": 424} 

448 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

449 

450 if butler.datastore.isEphemeral: 

451 # Never disassembled 

452 self.assertIsInstance(uri, ButlerURI) 

453 self.assertFalse(components) 

454 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

455 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

456 else: 

457 self.assertIsNone(uri) 

458 self.assertEqual(set(components), set(storageClass.components)) 

459 for compuri in components.values(): 

460 self.assertIsInstance(compuri, ButlerURI) 

461 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

462 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

463 

464 def testIngest(self): 

465 butler = Butler(self.tmpConfigFile, run="ingest") 

466 

467 # Create and register a DatasetType 

468 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

469 

470 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

471 datasetTypeName = "metric" 

472 

473 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

474 

475 # Add needed Dimensions 

476 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

477 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

478 "name": "d-r", 

479 "abstract_filter": "R"}) 

480 for detector in (1, 2): 

481 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

482 "full_name": f"detector{detector}"}) 

483 

484 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

485 "name": "fourtwentythree", "physical_filter": "d-r"}, 

486 {"instrument": "DummyCamComp", "id": 424, 

487 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

488 

489 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

490 dataRoot = os.path.join(TESTDIR, "data", "basic") 

491 datasets = [] 

492 for detector in (1, 2): 

493 detector_name = f"detector_{detector}" 

494 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

495 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

496 # Create a DatasetRef for ingest 

497 refIn = DatasetRef(datasetType, dataId, id=None) 

498 

499 datasets.append(FileDataset(path=metricFile, 

500 refs=[refIn], 

501 formatter=formatter)) 

502 

503 butler.ingest(*datasets, transfer="copy") 

504 

505 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

506 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

507 

508 metrics1 = butler.get(datasetTypeName, dataId1) 

509 metrics2 = butler.get(datasetTypeName, dataId2) 

510 self.assertNotEqual(metrics1, metrics2) 

511 

512 # Compare URIs 

513 uri1 = butler.getURI(datasetTypeName, dataId1) 

514 uri2 = butler.getURI(datasetTypeName, dataId2) 

515 self.assertNotEqual(uri1, uri2) 

516 

517 # Now do a multi-dataset but single file ingest 

518 metricFile = os.path.join(dataRoot, "detectors.yaml") 

519 refs = [] 

520 for detector in (1, 2): 

521 detector_name = f"detector_{detector}" 

522 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

523 # Create a DatasetRef for ingest 

524 refs.append(DatasetRef(datasetType, dataId, id=None)) 

525 

526 datasets = [] 

527 datasets.append(FileDataset(path=metricFile, 

528 refs=refs, 

529 formatter=MultiDetectorFormatter)) 

530 

531 butler.ingest(*datasets, transfer="copy") 

532 

533 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

534 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

535 

536 multi1 = butler.get(datasetTypeName, dataId1) 

537 multi2 = butler.get(datasetTypeName, dataId2) 

538 

539 self.assertEqual(multi1, metrics1) 

540 self.assertEqual(multi2, metrics2) 

541 

542 # Compare URIs 

543 uri1 = butler.getURI(datasetTypeName, dataId1) 

544 uri2 = butler.getURI(datasetTypeName, dataId2) 

545 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

546 

547 # Test that removing one does not break the second 

548 # This line will issue a warning log message for a ChainedDatastore 

549 # that uses an InMemoryDatastore since in-memory can not ingest 

550 # files. 

551 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

552 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

553 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

554 multi2b = butler.get(datasetTypeName, dataId2) 

555 self.assertEqual(multi2, multi2b) 

556 

557 def testPruneCollections(self): 

558 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

559 butler = Butler(self.tmpConfigFile, writeable=True) 

560 # Load registry data with dimensions to hang datasets off of. 

561 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

562 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

563 # Add some RUN-type collections. 

564 run1 = "run1" 

565 butler.registry.registerRun(run1) 

566 run2 = "run2" 

567 butler.registry.registerRun(run2) 

568 # put some datasets. ref1 and ref2 have the same data ID, and are in 

569 # different runs. ref3 has a different data ID. 

570 metric = makeExampleMetrics() 

571 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

572 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

573 butler.registry) 

574 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

575 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

576 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

577 # Try to delete a RUN collection without purge, or with purge and not 

578 # unstore. 

579 with self.assertRaises(TypeError): 

580 butler.pruneCollection(run1) 

581 with self.assertRaises(TypeError): 

582 butler.pruneCollection(run2, purge=True) 

583 # Add a TAGGED collection and associate ref3 only into it. 

584 tag1 = "tag1" 

585 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

586 butler.registry.associate(tag1, [ref3]) 

587 # Add a CHAINED collection that searches run1 and then run2. It 

588 # logically contains only ref1, because ref2 is shadowed due to them 

589 # having the same data ID and dataset type. 

590 chain1 = "chain1" 

591 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

592 butler.registry.setCollectionChain(chain1, [run1, run2]) 

593 # Try to delete RUN collections, which should fail with complete 

594 # rollback because they're still referenced by the CHAINED 

595 # collection. 

596 with self.assertRaises(Exception): 

597 butler.pruneCollection(run1, pruge=True, unstore=True) 

598 with self.assertRaises(Exception): 

599 butler.pruneCollection(run2, pruge=True, unstore=True) 

600 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

601 [ref1, ref2, ref3]) 

602 self.assertTrue(butler.datastore.exists(ref1)) 

603 self.assertTrue(butler.datastore.exists(ref2)) 

604 self.assertTrue(butler.datastore.exists(ref3)) 

605 # Try to delete CHAINED and TAGGED collections with purge; should not 

606 # work. 

607 with self.assertRaises(TypeError): 

608 butler.pruneCollection(tag1, purge=True, unstore=True) 

609 with self.assertRaises(TypeError): 

610 butler.pruneCollection(chain1, purge=True, unstore=True) 

611 # Remove the tagged collection with unstore=False. This should not 

612 # affect the datasets. 

613 butler.pruneCollection(tag1) 

614 with self.assertRaises(MissingCollectionError): 

615 butler.registry.getCollectionType(tag1) 

616 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

617 [ref1, ref2, ref3]) 

618 self.assertTrue(butler.datastore.exists(ref1)) 

619 self.assertTrue(butler.datastore.exists(ref2)) 

620 self.assertTrue(butler.datastore.exists(ref3)) 

621 # Add the tagged collection back in, and remove it with unstore=True. 

622 # This should remove ref3 only from the datastore. 

623 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

624 butler.registry.associate(tag1, [ref3]) 

625 butler.pruneCollection(tag1, unstore=True) 

626 with self.assertRaises(MissingCollectionError): 

627 butler.registry.getCollectionType(tag1) 

628 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

629 [ref1, ref2, ref3]) 

630 self.assertTrue(butler.datastore.exists(ref1)) 

631 self.assertTrue(butler.datastore.exists(ref2)) 

632 self.assertFalse(butler.datastore.exists(ref3)) 

633 # Delete the chain with unstore=False. The datasets should not be 

634 # affected at all. 

635 butler.pruneCollection(chain1) 

636 with self.assertRaises(MissingCollectionError): 

637 butler.registry.getCollectionType(chain1) 

638 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

639 [ref1, ref2, ref3]) 

640 self.assertTrue(butler.datastore.exists(ref1)) 

641 self.assertTrue(butler.datastore.exists(ref2)) 

642 self.assertFalse(butler.datastore.exists(ref3)) 

643 # Redefine and then delete the chain with unstore=True. Only ref1 

644 # should be unstored (ref3 has already been unstored, but otherwise 

645 # would be now). 

646 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

647 butler.registry.setCollectionChain(chain1, [run1, run2]) 

648 butler.pruneCollection(chain1, unstore=True) 

649 with self.assertRaises(MissingCollectionError): 

650 butler.registry.getCollectionType(chain1) 

651 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

652 [ref1, ref2, ref3]) 

653 self.assertFalse(butler.datastore.exists(ref1)) 

654 self.assertTrue(butler.datastore.exists(ref2)) 

655 self.assertFalse(butler.datastore.exists(ref3)) 

656 # Remove run1. This removes ref1 and ref3 from the registry (they're 

657 # already gone from the datastore, which is fine). 

658 butler.pruneCollection(run1, purge=True, unstore=True) 

659 with self.assertRaises(MissingCollectionError): 

660 butler.registry.getCollectionType(run1) 

661 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

662 [ref2]) 

663 self.assertTrue(butler.datastore.exists(ref2)) 

664 # Remove run2. This removes ref2 from the registry and the datastore. 

665 butler.pruneCollection(run2, purge=True, unstore=True) 

666 with self.assertRaises(MissingCollectionError): 

667 butler.registry.getCollectionType(run2) 

668 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

669 []) 

670 

671 def testPickle(self): 

672 """Test pickle support. 

673 """ 

674 butler = Butler(self.tmpConfigFile, run="ingest") 

675 butlerOut = pickle.loads(pickle.dumps(butler)) 

676 self.assertIsInstance(butlerOut, Butler) 

677 self.assertEqual(butlerOut._config, butler._config) 

678 self.assertEqual(butlerOut.collections, butler.collections) 

679 self.assertEqual(butlerOut.run, butler.run) 

680 

681 def testGetDatasetTypes(self): 

682 butler = Butler(self.tmpConfigFile, run="ingest") 

683 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

684 dimensionEntries = [ 

685 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

686 {"instrument": "DummyCamComp"}), 

687 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}), 

688 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

689 ] 

690 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

691 # Add needed Dimensions 

692 for args in dimensionEntries: 

693 butler.registry.insertDimensionData(*args) 

694 

695 # When a DatasetType is added to the registry entries are not created 

696 # for components but querying them can return the components. 

697 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

698 components = set() 

699 for datasetTypeName in datasetTypeNames: 

700 # Create and register a DatasetType 

701 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

702 

703 for componentName in storageClass.components: 

704 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

705 

706 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

707 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

708 

709 # Now that we have some dataset types registered, validate them 

710 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

711 "datasetType.component"]) 

712 

713 # Add a new datasetType that will fail template validation 

714 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

715 if self.validationCanFail: 

716 with self.assertRaises(ValidationError): 

717 butler.validateConfiguration() 

718 

719 # Rerun validation but with a subset of dataset type names 

720 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

721 

722 # Rerun validation but ignore the bad datasetType 

723 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

724 "datasetType.component"]) 

725 

726 def testTransaction(self): 

727 butler = Butler(self.tmpConfigFile, run="ingest") 

728 datasetTypeName = "test_metric" 

729 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

730 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

731 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

732 "abstract_filter": "R"}), 

733 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

734 "physical_filter": "d-r"})) 

735 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

736 metric = makeExampleMetrics() 

737 dataId = {"instrument": "DummyCam", "visit": 42} 

738 # Create and register a DatasetType 

739 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

740 with self.assertRaises(TransactionTestError): 

741 with butler.transaction(): 

742 # Add needed Dimensions 

743 for args in dimensionEntries: 

744 butler.registry.insertDimensionData(*args) 

745 # Store a dataset 

746 ref = butler.put(metric, datasetTypeName, dataId) 

747 self.assertIsInstance(ref, DatasetRef) 

748 # Test getDirect 

749 metricOut = butler.getDirect(ref) 

750 self.assertEqual(metric, metricOut) 

751 # Test get 

752 metricOut = butler.get(datasetTypeName, dataId) 

753 self.assertEqual(metric, metricOut) 

754 # Check we can get components 

755 self.assertGetComponents(butler, ref, 

756 ("summary", "data", "output"), metric) 

757 raise TransactionTestError("This should roll back the entire transaction") 

758 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"): 

759 butler.registry.expandDataId(dataId) 

760 # Should raise LookupError for missing data ID value 

761 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

762 butler.get(datasetTypeName, dataId) 

763 # Also check explicitly if Dataset entry is missing 

764 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

765 # Direct retrieval should not find the file in the Datastore 

766 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

767 butler.getDirect(ref) 

768 

769 def testMakeRepo(self): 

770 """Test that we can write butler configuration to a new repository via 

771 the Butler.makeRepo interface and then instantiate a butler from the 

772 repo root. 

773 """ 

774 # Do not run the test if we know this datastore configuration does 

775 # not support a file system root 

776 if self.fullConfigKey is None: 

777 return 

778 

779 # Remove the file created in setUp 

780 os.unlink(self.tmpConfigFile) 

781 

782 createRegistry = not self.useTempRoot 

783 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile), 

784 createRegistry=createRegistry) 

785 limited = Config(self.configFile) 

786 butler1 = Butler(butlerConfig) 

787 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

788 config=Config(self.configFile), overwrite=True) 

789 full = Config(self.tmpConfigFile) 

790 butler2 = Butler(butlerConfig) 

791 # Butlers should have the same configuration regardless of whether 

792 # defaults were expanded. 

793 self.assertEqual(butler1._config, butler2._config) 

794 # Config files loaded directly should not be the same. 

795 self.assertNotEqual(limited, full) 

796 # Make sure "limited" doesn't have a few keys we know it should be 

797 # inheriting from defaults. 

798 self.assertIn(self.fullConfigKey, full) 

799 self.assertNotIn(self.fullConfigKey, limited) 

800 

801 # Collections don't appear until something is put in them 

802 collections1 = set(butler1.registry.queryCollections()) 

803 self.assertEqual(collections1, set()) 

804 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

805 

806 # Check that a config with no associated file name will not 

807 # work properly with relocatable Butler repo 

808 butlerConfig.configFile = None 

809 with self.assertRaises(ValueError): 

810 Butler(butlerConfig) 

811 

812 with self.assertRaises(FileExistsError): 

813 Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

814 config=Config(self.configFile), overwrite=False) 

815 

816 def testStringification(self): 

817 butler = Butler(self.tmpConfigFile, run="ingest") 

818 butlerStr = str(butler) 

819 

820 if self.datastoreStr is not None: 

821 for testStr in self.datastoreStr: 

822 self.assertIn(testStr, butlerStr) 

823 if self.registryStr is not None: 

824 self.assertIn(self.registryStr, butlerStr) 

825 

826 datastoreName = butler.datastore.name 

827 if self.datastoreName is not None: 

828 for testStr in self.datastoreName: 

829 self.assertIn(testStr, datastoreName) 

830 

831 

832class FileLikeDatastoreButlerTests(ButlerTests): 

833 """Common tests and specialization of ButlerTests for butlers backed 

834 by datastores that inherit from FileLikeDatastore. 

835 """ 

836 

837 def checkFileExists(self, root, path): 

838 """Checks if file exists at a given path (relative to root). 

839 

840 Test testPutTemplates verifies actual physical existance of the files 

841 in the requested location. For POSIXDatastore this test is equivalent 

842 to `os.path.exists` call. 

843 """ 

844 return os.path.exists(os.path.join(root, path)) 

845 

846 def testPutTemplates(self): 

847 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

848 butler = Butler(self.tmpConfigFile, run="ingest") 

849 

850 # Add needed Dimensions 

851 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

852 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

853 "name": "d-r", 

854 "abstract_filter": "R"}) 

855 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

856 "physical_filter": "d-r"}) 

857 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

858 "physical_filter": "d-r"}) 

859 

860 # Create and store a dataset 

861 metric = makeExampleMetrics() 

862 

863 # Create two almost-identical DatasetTypes (both will use default 

864 # template) 

865 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

866 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

867 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

868 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

869 

870 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

871 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

872 dataId3 = {"instrument": "DummyCamComp", "visit": 425} 

873 

874 # Put with exactly the data ID keys needed 

875 ref = butler.put(metric, "metric1", dataId1) 

876 self.assertTrue(self.checkFileExists(butler.datastore.root, 

877 "ingest/metric1/d-r/DummyCamComp_423.pickle")) 

878 

879 # Check the template based on dimensions 

880 butler.datastore.templates.validateTemplates([ref]) 

881 

882 # Put with extra data ID keys (physical_filter is an optional 

883 # dependency); should not change template (at least the way we're 

884 # defining them to behave now; the important thing is that they 

885 # must be consistent). 

886 ref = butler.put(metric, "metric2", dataId2) 

887 self.assertTrue(self.checkFileExists(butler.datastore.root, 

888 "ingest/metric2/d-r/DummyCamComp_v423.pickle")) 

889 

890 # Check the template based on dimensions 

891 butler.datastore.templates.validateTemplates([ref]) 

892 

893 # Now use a file template that will not result in unique filenames 

894 ref = butler.put(metric, "metric3", dataId1) 

895 

896 # Check the template based on dimensions. This one is a bad template 

897 with self.assertRaises(FileTemplateValidationError): 

898 butler.datastore.templates.validateTemplates([ref]) 

899 

900 with self.assertRaises(FileExistsError): 

901 butler.put(metric, "metric3", dataId3) 

902 

903 def testImportExport(self): 

904 # Run put/get tests just to create and populate a repo. 

905 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

906 self.runImportExportTest(storageClass) 

907 

908 @unittest.expectedFailure 

909 def testImportExportVirtualComposite(self): 

910 # Run put/get tests just to create and populate a repo. 

911 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

912 self.runImportExportTest(storageClass) 

913 

914 def runImportExportTest(self, storageClass): 

915 exportButler = self.runPutGetTest(storageClass, "test_metric") 

916 # Test that the repo actually has at least one dataset. 

917 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

918 self.assertGreater(len(datasets), 0) 

919 # Export those datasets. We used TemporaryDirectory because there 

920 # doesn't seem to be a way to get the filename (as opposed to the file 

921 # object) from any of tempfile's temporary-file context managers. 

922 with tempfile.TemporaryDirectory() as exportDir: 

923 # TODO: When PosixDatastore supports transfer-on-exist, add tests 

924 # for that. 

925 exportFile = os.path.join(exportDir, "exports.yaml") 

926 with exportButler.export(filename=exportFile) as export: 

927 export.saveDatasets(datasets) 

928 self.assertTrue(os.path.exists(exportFile)) 

929 with tempfile.TemporaryDirectory() as importDir: 

930 Butler.makeRepo(importDir, config=Config(self.configFile)) 

931 # Calling script.butlerImport tests the implementation of the 

932 # butler command line interface "import" subcommand. Functions 

933 # in the script folder are generally considered protected and 

934 # should not be used as public api. 

935 with open(exportFile, "r") as f: 

936 script.butlerImport(importDir, output_run="ingest/run", export_file=f, 

937 directory=exportButler.datastore.root, transfer="symlink") 

938 importButler = Butler(importDir, run="ingest/run") 

939 for ref in datasets: 

940 with self.subTest(ref=ref): 

941 # Test for existence by passing in the DatasetType and 

942 # data ID separately, to avoid lookup by dataset_id. 

943 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

944 

945 

946class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

947 """PosixDatastore specialization of a butler""" 

948 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

949 fullConfigKey = ".datastore.formatters" 

950 validationCanFail = True 

951 datastoreStr = ["/tmp"] 

952 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"] 

953 registryStr = "/gen3.sqlite3" 

954 

955 

956class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

957 """InMemoryDatastore specialization of a butler""" 

958 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

959 fullConfigKey = None 

960 useTempRoot = False 

961 validationCanFail = False 

962 datastoreStr = ["datastore='InMemory"] 

963 datastoreName = ["InMemoryDatastore@"] 

964 registryStr = ":memory:" 

965 

966 def testIngest(self): 

967 pass 

968 

969 

970class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

971 """PosixDatastore specialization""" 

972 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

973 fullConfigKey = ".datastore.datastores.1.formatters" 

974 validationCanFail = True 

975 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"] 

976 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1", 

977 "SecondDatastore"] 

978 registryStr = "/gen3.sqlite3" 

979 

980 

981class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

982 """Test that a yaml file in one location can refer to a root in another.""" 

983 

984 datastoreStr = ["dir1"] 

985 # Disable the makeRepo test since we are deliberately not using 

986 # butler.yaml as the config name. 

987 fullConfigKey = None 

988 

989 def setUp(self): 

990 self.root = tempfile.mkdtemp(dir=TESTDIR) 

991 

992 # Make a new repository in one place 

993 self.dir1 = os.path.join(self.root, "dir1") 

994 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

995 

996 # Move the yaml file to a different place and add a "root" 

997 self.dir2 = os.path.join(self.root, "dir2") 

998 safeMakeDir(self.dir2) 

999 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1000 config = Config(configFile1) 

1001 config["root"] = self.dir1 

1002 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1003 config.dumpToFile(configFile2) 

1004 os.remove(configFile1) 

1005 self.tmpConfigFile = configFile2 

1006 

1007 def testFileLocations(self): 

1008 self.assertNotEqual(self.dir1, self.dir2) 

1009 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1010 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1011 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1012 

1013 

1014class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1015 """Test that a config file created by makeRepo outside of repo works.""" 

1016 

1017 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1018 

1019 def setUp(self): 

1020 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1021 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1022 

1023 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1024 Butler.makeRepo(self.root, config=Config(self.configFile), 

1025 outfile=self.tmpConfigFile) 

1026 

1027 def tearDown(self): 

1028 if os.path.exists(self.root2): 

1029 shutil.rmtree(self.root2, ignore_errors=True) 

1030 super().tearDown() 

1031 

1032 def testConfigExistence(self): 

1033 c = Config(self.tmpConfigFile) 

1034 uri_config = ButlerURI(c["root"]) 

1035 uri_expected = ButlerURI(self.root, forceDirectory=True) 

1036 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1037 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1038 

1039 def testPutGet(self): 

1040 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1041 self.runPutGetTest(storageClass, "test_metric") 

1042 

1043 

1044class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1045 """Test that a config file created by makeRepo outside of repo works.""" 

1046 

1047 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1048 

1049 def setUp(self): 

1050 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1051 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1052 

1053 self.tmpConfigFile = self.root2 

1054 Butler.makeRepo(self.root, config=Config(self.configFile), 

1055 outfile=self.tmpConfigFile) 

1056 

1057 def testConfigExistence(self): 

1058 # Append the yaml file else Config constructor does not know the file 

1059 # type. 

1060 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1061 super().testConfigExistence() 

1062 

1063 

1064class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1065 """Test that a config file created by makeRepo outside of repo works.""" 

1066 

1067 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1068 

1069 def setUp(self): 

1070 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1071 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1072 

1073 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

1074 Butler.makeRepo(self.root, config=Config(self.configFile), 

1075 outfile=self.tmpConfigFile) 

1076 

1077 

1078@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1079@mock_s3 

1080class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

1081 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1082 a local in-memory SqlRegistry. 

1083 """ 

1084 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1085 fullConfigKey = None 

1086 validationCanFail = True 

1087 

1088 bucketName = "anybucketname" 

1089 """Name of the Bucket that will be used in the tests. The name is read from 

1090 the config file used with the tests during set-up. 

1091 """ 

1092 

1093 root = "butlerRoot/" 

1094 """Root repository directory expected to be used in case useTempRoot=False. 

1095 Otherwise the root is set to a 20 characters long randomly generated string 

1096 during set-up. 

1097 """ 

1098 

1099 datastoreStr = [f"datastore={root}"] 

1100 """Contains all expected root locations in a format expected to be 

1101 returned by Butler stringification. 

1102 """ 

1103 

1104 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"] 

1105 """The expected format of the S3Datastore string.""" 

1106 

1107 registryStr = ":memory:" 

1108 """Expected format of the Registry string.""" 

1109 

1110 def genRoot(self): 

1111 """Returns a random string of len 20 to serve as a root 

1112 name for the temporary bucket repo. 

1113 

1114 This is equivalent to tempfile.mkdtemp as this is what self.root 

1115 becomes when useTempRoot is True. 

1116 """ 

1117 rndstr = "".join( 

1118 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1119 ) 

1120 return rndstr + "/" 

1121 

1122 def setUp(self): 

1123 config = Config(self.configFile) 

1124 uri = ButlerURI(config[".datastore.datastore.root"]) 

1125 self.bucketName = uri.netloc 

1126 

1127 # set up some fake credentials if they do not exist 

1128 self.usingDummyCredentials = setAwsEnvCredentials() 

1129 

1130 if self.useTempRoot: 

1131 self.root = self.genRoot() 

1132 rooturi = f"s3://{self.bucketName}/{self.root}" 

1133 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1134 

1135 # MOTO needs to know that we expect Bucket bucketname to exist 

1136 # (this used to be the class attribute bucketName) 

1137 s3 = boto3.resource("s3") 

1138 s3.create_bucket(Bucket=self.bucketName) 

1139 

1140 self.datastoreStr = f"datastore={self.root}" 

1141 self.datastoreName = [f"S3Datastore@{rooturi}"] 

1142 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1143 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1144 

1145 def tearDown(self): 

1146 s3 = boto3.resource("s3") 

1147 bucket = s3.Bucket(self.bucketName) 

1148 try: 

1149 bucket.objects.all().delete() 

1150 except botocore.exceptions.ClientError as e: 

1151 if e.response["Error"]["Code"] == "404": 

1152 # the key was not reachable - pass 

1153 pass 

1154 else: 

1155 raise 

1156 

1157 bucket = s3.Bucket(self.bucketName) 

1158 bucket.delete() 

1159 

1160 # unset any potentially set dummy credentials 

1161 if self.usingDummyCredentials: 

1162 unsetAwsEnvCredentials() 

1163 

1164 def checkFileExists(self, root, relpath): 

1165 """Checks if file exists at a given path (relative to root). 

1166 

1167 Test testPutTemplates verifies actual physical existance of the files 

1168 in the requested location. For S3Datastore this test is equivalent to 

1169 `lsst.daf.butler.core.s3utils.s3checkFileExists` call. 

1170 """ 

1171 uri = ButlerURI(root) 

1172 uri.updateFile(relpath) 

1173 return s3CheckFileExists(uri)[0] 

1174 

1175 @unittest.expectedFailure 

1176 def testImportExport(self): 

1177 super().testImportExport() 

1178 

1179 

1180if __name__ == "__main__": 1180 ↛ 1181line 1180 didn't jump to line 1181, because the condition on line 1180 was never true

1181 unittest.main()