Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import os 

26import posixpath 

27import unittest 

28import tempfile 

29import shutil 

30import pickle 

31import string 

32import random 

33import numpy as np 

34 

35try: 

36 import boto3 

37 import botocore 

38 from moto import mock_s3 

39except ImportError: 

40 boto3 = None 

41 

42 def mock_s3(cls): 

43 """A no-op decorator in case moto mock_s3 can not be imported. 

44 """ 

45 return cls 

46 

47from lsst.utils import doImport 

48from lsst.daf.butler.core.safeFileIo import safeMakeDir 

49from lsst.daf.butler import Butler, Config, ButlerConfig 

50from lsst.daf.butler import StorageClassFactory 

51from lsst.daf.butler import DatasetType, DatasetRef 

52from lsst.daf.butler import FileTemplateValidationError, ValidationError 

53from lsst.daf.butler import FileDataset 

54from lsst.daf.butler import CollectionSearch, CollectionType 

55from lsst.daf.butler.registry import MissingCollectionError 

56from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

57from lsst.daf.butler.core.location import ButlerURI 

58from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials, 

59 unsetAwsEnvCredentials) 

60 

61from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

62 

63TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

64 

65 

66def makeExampleMetrics(): 

67 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

68 {"a": [1, 2, 3], 

69 "b": {"blue": 5, "red": "green"}}, 

70 [563, 234, 456.7, 752, 8, 9, 27] 

71 ) 

72 

73 

74class TransactionTestError(Exception): 

75 """Specific error for testing transactions, to prevent misdiagnosing 

76 that might otherwise occur when a standard exception is used. 

77 """ 

78 pass 

79 

80 

81class ButlerConfigTests(unittest.TestCase): 

82 """Simple tests for ButlerConfig that are not tested in other test cases. 

83 """ 

84 

85 def testSearchPath(self): 

86 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

87 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

88 config1 = ButlerConfig(configFile) 

89 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

90 

91 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

92 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

93 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

94 self.assertIn("testConfigs", "\n".join(cm.output)) 

95 

96 key = ("datastore", "records", "table") 

97 self.assertNotEqual(config1[key], config2[key]) 

98 self.assertEqual(config2[key], "override_record") 

99 

100 

101class ButlerPutGetTests: 

102 """Helper method for running a suite of put/get tests from different 

103 butler configurations.""" 

104 

105 root = None 

106 

107 @staticmethod 

108 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

109 """Create a DatasetType and register it 

110 """ 

111 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

112 registry.registerDatasetType(datasetType) 

113 return datasetType 

114 

115 @classmethod 

116 def setUpClass(cls): 

117 cls.storageClassFactory = StorageClassFactory() 

118 cls.storageClassFactory.addFromConfig(cls.configFile) 

119 

120 def assertGetComponents(self, butler, datasetRef, components, reference): 

121 datasetTypeName = datasetRef.datasetType.name 

122 dataId = datasetRef.dataId 

123 for component in components: 

124 compTypeName = DatasetType.nameWithComponent(datasetTypeName, component) 

125 result = butler.get(compTypeName, dataId) 

126 self.assertEqual(result, getattr(reference, component)) 

127 

128 def tearDown(self): 

129 if self.root is not None and os.path.exists(self.root): 

130 shutil.rmtree(self.root, ignore_errors=True) 

131 

132 def runPutGetTest(self, storageClass, datasetTypeName): 

133 # New datasets will be added to run and tag, but we will only look in 

134 # tag when looking up datasets. 

135 run = "ingest/run" 

136 tag = "ingest" 

137 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag]) 

138 

139 # There will not be a collection yet 

140 collections = set(butler.registry.queryCollections()) 

141 self.assertEqual(collections, set([run, tag])) 

142 

143 # Create and register a DatasetType 

144 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

145 

146 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

147 

148 # Try to create one that will have a name that is too long 

149 with self.assertRaises(Exception) as cm: 

150 self.addDatasetType("DatasetTypeNameTooLong" * 50, dimensions, storageClass, butler.registry) 

151 self.assertIn("check constraint", str(cm.exception).lower()) 

152 

153 # Add needed Dimensions 

154 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

155 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

156 "name": "d-r", 

157 "abstract_filter": "R"}) 

158 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp", 

159 "id": 1, 

160 "name": "default"}) 

161 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

162 "name": "fourtwentythree", "physical_filter": "d-r", 

163 "visit_system": 1}) 

164 

165 # Create and store a dataset 

166 metric = makeExampleMetrics() 

167 dataId = {"instrument": "DummyCamComp", "visit": 423} 

168 

169 # Create a DatasetRef for put 

170 refIn = DatasetRef(datasetType, dataId, id=None) 

171 

172 # Put with a preexisting id should fail 

173 with self.assertRaises(ValueError): 

174 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

175 

176 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

177 # and once with a DatasetType 

178 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

179 with self.subTest(args=args): 

180 ref = butler.put(metric, *args) 

181 self.assertIsInstance(ref, DatasetRef) 

182 

183 # Test getDirect 

184 metricOut = butler.getDirect(ref) 

185 self.assertEqual(metric, metricOut) 

186 # Test get 

187 metricOut = butler.get(ref.datasetType.name, dataId) 

188 self.assertEqual(metric, metricOut) 

189 # Test get with a datasetRef 

190 metricOut = butler.get(ref) 

191 self.assertEqual(metric, metricOut) 

192 # Test getDeferred with dataId 

193 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get() 

194 self.assertEqual(metric, metricOut) 

195 # Test getDeferred with a datasetRef 

196 metricOut = butler.getDeferred(ref).get() 

197 self.assertEqual(metric, metricOut) 

198 

199 # Check we can get components 

200 if storageClass.isComposite(): 

201 self.assertGetComponents(butler, ref, 

202 ("summary", "data", "output"), metric) 

203 

204 # Remove from the tagged collection only; after that we 

205 # shouldn't be able to find it unless we use the dataset_id. 

206 butler.pruneDatasets([ref]) 

207 with self.assertRaises(LookupError): 

208 butler.datasetExists(*args) 

209 # Registry still knows about it, if we use the dataset_id. 

210 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

211 # If we use the output ref with the dataset_id, we should 

212 # still be able to load it with getDirect(). 

213 self.assertEqual(metric, butler.getDirect(ref)) 

214 

215 # Reinsert into collection, then delete from Datastore *and* 

216 # remove from collection. 

217 butler.registry.associate(tag, [ref]) 

218 butler.pruneDatasets([ref], unstore=True) 

219 # Lookup with original args should still fail. 

220 with self.assertRaises(LookupError): 

221 butler.datasetExists(*args) 

222 # Now getDirect() should fail, too. 

223 with self.assertRaises(FileNotFoundError, msg=f"Checking ref {ref} not found"): 

224 butler.getDirect(ref) 

225 # Registry still knows about it, if we use the dataset_id. 

226 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

227 

228 # Now remove the dataset completely. 

229 butler.pruneDatasets([ref], purge=True, unstore=True) 

230 # Lookup with original args should still fail. 

231 with self.assertRaises(LookupError): 

232 butler.datasetExists(*args) 

233 # getDirect() should still fail. 

234 with self.assertRaises(FileNotFoundError): 

235 butler.getDirect(ref) 

236 # Registry shouldn't be able to find it by dataset_id anymore. 

237 self.assertIsNone(butler.registry.getDataset(ref.id)) 

238 

239 # Put the dataset again, since the last thing we did was remove it. 

240 ref = butler.put(metric, refIn) 

241 

242 # Get with parameters 

243 stop = 4 

244 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

245 self.assertNotEqual(metric, sliced) 

246 self.assertEqual(metric.summary, sliced.summary) 

247 self.assertEqual(metric.output, sliced.output) 

248 self.assertEqual(metric.data[:stop], sliced.data) 

249 # getDeferred with parameters 

250 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

251 self.assertNotEqual(metric, sliced) 

252 self.assertEqual(metric.summary, sliced.summary) 

253 self.assertEqual(metric.output, sliced.output) 

254 self.assertEqual(metric.data[:stop], sliced.data) 

255 # getDeferred with deferred parameters 

256 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

257 self.assertNotEqual(metric, sliced) 

258 self.assertEqual(metric.summary, sliced.summary) 

259 self.assertEqual(metric.output, sliced.output) 

260 self.assertEqual(metric.data[:stop], sliced.data) 

261 

262 if storageClass.isComposite(): 

263 # Check that components can be retrieved 

264 # ref.components will only be populated in certain cases 

265 metricOut = butler.get(ref.datasetType.name, dataId) 

266 compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary") 

267 compNameD = DatasetType.nameWithComponent(datasetTypeName, "data") 

268 summary = butler.get(compNameS, dataId) 

269 self.assertEqual(summary, metric.summary) 

270 data = butler.get(compNameD, dataId) 

271 self.assertEqual(data, metric.data) 

272 

273 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

274 if ref.components: 

275 self.assertTrue(butler.datastore.exists(ref.components["summary"])) 

276 self.assertEqual(compRef, ref.components["summary"]) 

277 self.assertTrue(butler.datastore.exists(ref.components["data"])) 

278 else: 

279 self.assertTrue(compRef.hasParentId) 

280 

281 # Create a Dataset type that has the same name but is inconsistent. 

282 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions, 

283 self.storageClassFactory.getStorageClass("Config")) 

284 

285 # Getting with a dataset type that does not match registry fails 

286 with self.assertRaises(ValueError): 

287 butler.get(inconsistentDatasetType, dataId) 

288 

289 # Combining a DatasetRef with a dataId should fail 

290 with self.assertRaises(ValueError): 

291 butler.get(ref, dataId) 

292 # Getting with an explicit ref should fail if the id doesn't match 

293 with self.assertRaises(ValueError): 

294 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

295 

296 # Getting a dataset with unknown parameters should fail 

297 with self.assertRaises(KeyError): 

298 butler.get(ref, parameters={"unsupported": True}) 

299 

300 # Check we have a collection 

301 collections = set(butler.registry.queryCollections()) 

302 self.assertEqual(collections, {run, tag}) 

303 

304 # Clean up to check that we can remove something that may have 

305 # already had a component removed 

306 butler.pruneDatasets([ref], unstore=True, purge=True) 

307 

308 # Add a dataset back in since some downstream tests require 

309 # something to be present 

310 ref = butler.put(metric, refIn) 

311 

312 return butler 

313 

314 def testDeferredCollectionPassing(self): 

315 # Construct a butler with no run or collection, but make it writeable. 

316 butler = Butler(self.tmpConfigFile, writeable=True) 

317 # Create and register a DatasetType 

318 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

319 datasetType = self.addDatasetType("example", dimensions, 

320 self.storageClassFactory.getStorageClass("StructuredData"), 

321 butler.registry) 

322 # Add needed Dimensions 

323 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

324 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

325 "name": "d-r", 

326 "abstract_filter": "R"}) 

327 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

328 "name": "fourtwentythree", "physical_filter": "d-r"}) 

329 dataId = {"instrument": "DummyCamComp", "visit": 423} 

330 # Create dataset. 

331 metric = makeExampleMetrics() 

332 # Register a new run and put dataset. 

333 run = "deferred" 

334 butler.registry.registerRun(run) 

335 ref = butler.put(metric, datasetType, dataId, run=run) 

336 # Putting with no run should fail with TypeError. 

337 with self.assertRaises(TypeError): 

338 butler.put(metric, datasetType, dataId) 

339 # Dataset should exist. 

340 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

341 # We should be able to get the dataset back, but with and without 

342 # a deferred dataset handle. 

343 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

344 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

345 # Trying to find the dataset without any collection is a TypeError. 

346 with self.assertRaises(TypeError): 

347 butler.datasetExists(datasetType, dataId) 

348 with self.assertRaises(TypeError): 

349 butler.get(datasetType, dataId) 

350 # Associate the dataset with a different collection. 

351 butler.registry.registerCollection("tagged") 

352 butler.registry.associate("tagged", [ref]) 

353 # Deleting the dataset from the new collection should make it findable 

354 # in the original collection. 

355 butler.pruneDatasets([ref], tags=["tagged"]) 

356 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

357 

358 

359class ButlerTests(ButlerPutGetTests): 

360 """Tests for Butler. 

361 """ 

362 useTempRoot = True 

363 

364 def setUp(self): 

365 """Create a new butler root for each test.""" 

366 if self.useTempRoot: 

367 self.root = tempfile.mkdtemp(dir=TESTDIR) 

368 Butler.makeRepo(self.root, config=Config(self.configFile)) 

369 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

370 else: 

371 self.root = None 

372 self.tmpConfigFile = self.configFile 

373 

374 def testConstructor(self): 

375 """Independent test of constructor. 

376 """ 

377 butler = Butler(self.tmpConfigFile, run="ingest") 

378 self.assertIsInstance(butler, Butler) 

379 

380 collections = set(butler.registry.queryCollections()) 

381 self.assertEqual(collections, {"ingest"}) 

382 

383 butler2 = Butler(butler=butler, collections=["other"]) 

384 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"])) 

385 self.assertIsNone(butler2.run) 

386 self.assertIs(butler.registry, butler2.registry) 

387 self.assertIs(butler.datastore, butler2.datastore) 

388 

389 def testBasicPutGet(self): 

390 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

391 self.runPutGetTest(storageClass, "test_metric") 

392 

393 def testCompositePutGetConcrete(self): 

394 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

395 self.runPutGetTest(storageClass, "test_metric") 

396 

397 def testCompositePutGetVirtual(self): 

398 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

399 self.runPutGetTest(storageClass, "test_metric_comp") 

400 

401 def testIngest(self): 

402 butler = Butler(self.tmpConfigFile, run="ingest") 

403 

404 # Create and register a DatasetType 

405 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

406 

407 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

408 datasetTypeName = "metric" 

409 

410 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

411 

412 # Add needed Dimensions 

413 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

414 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

415 "name": "d-r", 

416 "abstract_filter": "R"}) 

417 for detector in (1, 2): 

418 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

419 "full_name": f"detector{detector}"}) 

420 

421 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

422 "name": "fourtwentythree", "physical_filter": "d-r"}, 

423 {"instrument": "DummyCamComp", "id": 424, 

424 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

425 

426 formatter = doImport("lsst.daf.butler.formatters.yamlFormatter.YamlFormatter") 

427 dataRoot = os.path.join(TESTDIR, "data", "basic") 

428 datasets = [] 

429 for detector in (1, 2): 

430 detector_name = f"detector_{detector}" 

431 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

432 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

433 # Create a DatasetRef for ingest 

434 refIn = DatasetRef(datasetType, dataId, id=None) 

435 

436 datasets.append(FileDataset(path=metricFile, 

437 refs=[refIn], 

438 formatter=formatter)) 

439 

440 butler.ingest(*datasets, transfer="copy") 

441 

442 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

443 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

444 

445 metrics1 = butler.get(datasetTypeName, dataId1) 

446 metrics2 = butler.get(datasetTypeName, dataId2) 

447 self.assertNotEqual(metrics1, metrics2) 

448 

449 # Compare URIs 

450 uri1 = butler.getUri(datasetTypeName, dataId1) 

451 uri2 = butler.getUri(datasetTypeName, dataId2) 

452 self.assertNotEqual(uri1, uri2) 

453 

454 # Now do a multi-dataset but single file ingest 

455 metricFile = os.path.join(dataRoot, "detectors.yaml") 

456 refs = [] 

457 for detector in (1, 2): 

458 detector_name = f"detector_{detector}" 

459 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

460 # Create a DatasetRef for ingest 

461 refs.append(DatasetRef(datasetType, dataId, id=None)) 

462 

463 datasets = [] 

464 datasets.append(FileDataset(path=metricFile, 

465 refs=refs, 

466 formatter=MultiDetectorFormatter)) 

467 

468 butler.ingest(*datasets, transfer="copy") 

469 

470 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

471 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

472 

473 multi1 = butler.get(datasetTypeName, dataId1) 

474 multi2 = butler.get(datasetTypeName, dataId2) 

475 

476 self.assertEqual(multi1, metrics1) 

477 self.assertEqual(multi2, metrics2) 

478 

479 # Compare URIs 

480 uri1 = butler.getUri(datasetTypeName, dataId1) 

481 uri2 = butler.getUri(datasetTypeName, dataId2) 

482 self.assertEqual(uri1, uri2) 

483 

484 # Test that removing one does not break the second 

485 # This line will issue a warning log message for a ChainedDatastore 

486 # that uses an InMemoryDatastore since in-memory can not ingest 

487 # files. 

488 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

489 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

490 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

491 multi2b = butler.get(datasetTypeName, dataId2) 

492 self.assertEqual(multi2, multi2b) 

493 

494 def testPruneCollections(self): 

495 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

496 butler = Butler(self.tmpConfigFile, writeable=True) 

497 # Load registry data with dimensions to hang datasets off of. 

498 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

499 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

500 # Add some RUN-type collections. 

501 run1 = "run1" 

502 butler.registry.registerRun(run1) 

503 run2 = "run2" 

504 butler.registry.registerRun(run2) 

505 # put some datasets. ref1 and ref2 have the same data ID, and are in 

506 # different runs. ref3 has a different data ID. 

507 metric = makeExampleMetrics() 

508 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

509 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

510 butler.registry) 

511 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

512 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

513 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

514 # Try to delete a RUN collection without purge, or with purge and not 

515 # unstore. 

516 with self.assertRaises(TypeError): 

517 butler.pruneCollection(run1) 

518 with self.assertRaises(TypeError): 

519 butler.pruneCollection(run2, purge=True) 

520 # Add a TAGGED collection and associate ref3 only into it. 

521 tag1 = "tag1" 

522 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

523 butler.registry.associate(tag1, [ref3]) 

524 # Add a CHAINED collection that searches run1 and then run2. It 

525 # logically contains only ref1, because ref2 is shadowed due to them 

526 # having the same data ID and dataset type. 

527 chain1 = "chain1" 

528 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

529 butler.registry.setCollectionChain(chain1, [run1, run2]) 

530 # Try to delete RUN collections, which should fail with complete 

531 # rollback because they're still referenced by the CHAINED 

532 # collection. 

533 with self.assertRaises(Exception): 

534 butler.pruneCollection(run1, pruge=True, unstore=True) 

535 with self.assertRaises(Exception): 

536 butler.pruneCollection(run2, pruge=True, unstore=True) 

537 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

538 [ref1, ref2, ref3]) 

539 self.assertTrue(butler.datastore.exists(ref1)) 

540 self.assertTrue(butler.datastore.exists(ref2)) 

541 self.assertTrue(butler.datastore.exists(ref3)) 

542 # Try to delete CHAINED and TAGGED collections with purge; should not 

543 # work. 

544 with self.assertRaises(TypeError): 

545 butler.pruneCollection(tag1, purge=True, unstore=True) 

546 with self.assertRaises(TypeError): 

547 butler.pruneCollection(chain1, purge=True, unstore=True) 

548 # Remove the tagged collection with unstore=False. This should not 

549 # affect the datasets. 

550 butler.pruneCollection(tag1) 

551 with self.assertRaises(MissingCollectionError): 

552 butler.registry.getCollectionType(tag1) 

553 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

554 [ref1, ref2, ref3]) 

555 self.assertTrue(butler.datastore.exists(ref1)) 

556 self.assertTrue(butler.datastore.exists(ref2)) 

557 self.assertTrue(butler.datastore.exists(ref3)) 

558 # Add the tagged collection back in, and remove it with unstore=True. 

559 # This should remove ref3 only from the datastore. 

560 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

561 butler.registry.associate(tag1, [ref3]) 

562 butler.pruneCollection(tag1, unstore=True) 

563 with self.assertRaises(MissingCollectionError): 

564 butler.registry.getCollectionType(tag1) 

565 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

566 [ref1, ref2, ref3]) 

567 self.assertTrue(butler.datastore.exists(ref1)) 

568 self.assertTrue(butler.datastore.exists(ref2)) 

569 self.assertFalse(butler.datastore.exists(ref3)) 

570 # Delete the chain with unstore=False. The datasets should not be 

571 # affected at all. 

572 butler.pruneCollection(chain1) 

573 with self.assertRaises(MissingCollectionError): 

574 butler.registry.getCollectionType(chain1) 

575 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

576 [ref1, ref2, ref3]) 

577 self.assertTrue(butler.datastore.exists(ref1)) 

578 self.assertTrue(butler.datastore.exists(ref2)) 

579 self.assertFalse(butler.datastore.exists(ref3)) 

580 # Redefine and then delete the chain with unstore=True. Only ref1 

581 # should be unstored (ref3 has already been unstored, but otherwise 

582 # would be now). 

583 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

584 butler.registry.setCollectionChain(chain1, [run1, run2]) 

585 butler.pruneCollection(chain1, unstore=True) 

586 with self.assertRaises(MissingCollectionError): 

587 butler.registry.getCollectionType(chain1) 

588 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

589 [ref1, ref2, ref3]) 

590 self.assertFalse(butler.datastore.exists(ref1)) 

591 self.assertTrue(butler.datastore.exists(ref2)) 

592 self.assertFalse(butler.datastore.exists(ref3)) 

593 # Remove run1. This removes ref1 and ref3 from the registry (they're 

594 # already gone from the datastore, which is fine). 

595 butler.pruneCollection(run1, purge=True, unstore=True) 

596 with self.assertRaises(MissingCollectionError): 

597 butler.registry.getCollectionType(run1) 

598 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

599 [ref2]) 

600 self.assertTrue(butler.datastore.exists(ref2)) 

601 # Remove run2. This removes ref2 from the registry and the datastore. 

602 butler.pruneCollection(run2, purge=True, unstore=True) 

603 with self.assertRaises(MissingCollectionError): 

604 butler.registry.getCollectionType(run2) 

605 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

606 []) 

607 

608 def testPickle(self): 

609 """Test pickle support. 

610 """ 

611 butler = Butler(self.tmpConfigFile, run="ingest") 

612 butlerOut = pickle.loads(pickle.dumps(butler)) 

613 self.assertIsInstance(butlerOut, Butler) 

614 self.assertEqual(butlerOut._config, butler._config) 

615 self.assertEqual(butlerOut.collections, butler.collections) 

616 self.assertEqual(butlerOut.run, butler.run) 

617 

618 def testGetDatasetTypes(self): 

619 butler = Butler(self.tmpConfigFile, run="ingest") 

620 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

621 dimensionEntries = [ 

622 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

623 {"instrument": "DummyCamComp"}), 

624 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}), 

625 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

626 ] 

627 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

628 # Add needed Dimensions 

629 for args in dimensionEntries: 

630 butler.registry.insertDimensionData(*args) 

631 

632 # When a DatasetType is added to the registry entries are created 

633 # for each component. Need entries for each component in the test 

634 # configuration otherwise validation won't work. The ones that 

635 # are deliberately broken will be ignored later. 

636 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi"} 

637 components = set() 

638 for datasetTypeName in datasetTypeNames: 

639 # Create and register a DatasetType 

640 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

641 

642 for componentName in storageClass.components: 

643 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

644 

645 fromRegistry = set(butler.registry.queryDatasetTypes()) 

646 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

647 

648 # Now that we have some dataset types registered, validate them 

649 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

650 "datasetType.component"]) 

651 

652 # Add a new datasetType that will fail template validation 

653 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

654 if self.validationCanFail: 

655 with self.assertRaises(ValidationError): 

656 butler.validateConfiguration() 

657 

658 # Rerun validation but with a subset of dataset type names 

659 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

660 

661 # Rerun validation but ignore the bad datasetType 

662 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

663 "datasetType.component"]) 

664 

665 def testTransaction(self): 

666 butler = Butler(self.tmpConfigFile, run="ingest") 

667 datasetTypeName = "test_metric" 

668 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

669 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

670 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

671 "abstract_filter": "R"}), 

672 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

673 "physical_filter": "d-r"})) 

674 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

675 metric = makeExampleMetrics() 

676 dataId = {"instrument": "DummyCam", "visit": 42} 

677 # Create and register a DatasetType 

678 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

679 with self.assertRaises(TransactionTestError): 

680 with butler.transaction(): 

681 # Add needed Dimensions 

682 for args in dimensionEntries: 

683 butler.registry.insertDimensionData(*args) 

684 # Store a dataset 

685 ref = butler.put(metric, datasetTypeName, dataId) 

686 self.assertIsInstance(ref, DatasetRef) 

687 # Test getDirect 

688 metricOut = butler.getDirect(ref) 

689 self.assertEqual(metric, metricOut) 

690 # Test get 

691 metricOut = butler.get(datasetTypeName, dataId) 

692 self.assertEqual(metric, metricOut) 

693 # Check we can get components 

694 self.assertGetComponents(butler, ref, 

695 ("summary", "data", "output"), metric) 

696 raise TransactionTestError("This should roll back the entire transaction") 

697 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"): 

698 butler.registry.expandDataId(dataId) 

699 # Should raise LookupError for missing data ID value 

700 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

701 butler.get(datasetTypeName, dataId) 

702 # Also check explicitly if Dataset entry is missing 

703 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

704 # Direct retrieval should not find the file in the Datastore 

705 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

706 butler.getDirect(ref) 

707 

708 def testMakeRepo(self): 

709 """Test that we can write butler configuration to a new repository via 

710 the Butler.makeRepo interface and then instantiate a butler from the 

711 repo root. 

712 """ 

713 # Do not run the test if we know this datastore configuration does 

714 # not support a file system root 

715 if self.fullConfigKey is None: 

716 return 

717 

718 # Remove the file created in setUp 

719 os.unlink(self.tmpConfigFile) 

720 

721 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile)) 

722 limited = Config(self.configFile) 

723 butler1 = Butler(butlerConfig) 

724 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

725 config=Config(self.configFile), overwrite=True) 

726 full = Config(self.tmpConfigFile) 

727 butler2 = Butler(butlerConfig) 

728 # Butlers should have the same configuration regardless of whether 

729 # defaults were expanded. 

730 self.assertEqual(butler1._config, butler2._config) 

731 # Config files loaded directly should not be the same. 

732 self.assertNotEqual(limited, full) 

733 # Make sure "limited" doesn't have a few keys we know it should be 

734 # inheriting from defaults. 

735 self.assertIn(self.fullConfigKey, full) 

736 self.assertNotIn(self.fullConfigKey, limited) 

737 

738 # Collections don't appear until something is put in them 

739 collections1 = set(butler1.registry.queryCollections()) 

740 self.assertEqual(collections1, set()) 

741 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

742 

743 # Check that a config with no associated file name will not 

744 # work properly with relocatable Butler repo 

745 butlerConfig.configFile = None 

746 with self.assertRaises(ValueError): 

747 Butler(butlerConfig) 

748 

749 with self.assertRaises(FileExistsError): 

750 Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

751 config=Config(self.configFile), overwrite=False) 

752 

753 def testStringification(self): 

754 butler = Butler(self.tmpConfigFile, run="ingest") 

755 butlerStr = str(butler) 

756 

757 if self.datastoreStr is not None: 

758 for testStr in self.datastoreStr: 

759 self.assertIn(testStr, butlerStr) 

760 if self.registryStr is not None: 

761 self.assertIn(self.registryStr, butlerStr) 

762 

763 datastoreName = butler.datastore.name 

764 if self.datastoreName is not None: 

765 for testStr in self.datastoreName: 

766 self.assertIn(testStr, datastoreName) 

767 

768 

769class FileLikeDatastoreButlerTests(ButlerTests): 

770 """Common tests and specialization of ButlerTests for butlers backed 

771 by datastores that inherit from FileLikeDatastore. 

772 """ 

773 

774 def checkFileExists(self, root, path): 

775 """Checks if file exists at a given path (relative to root). 

776 

777 Test testPutTemplates verifies actual physical existance of the files 

778 in the requested location. For POSIXDatastore this test is equivalent 

779 to `os.path.exists` call. 

780 """ 

781 return os.path.exists(os.path.join(root, path)) 

782 

783 def testPutTemplates(self): 

784 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

785 butler = Butler(self.tmpConfigFile, run="ingest") 

786 

787 # Add needed Dimensions 

788 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

789 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

790 "name": "d-r", 

791 "abstract_filter": "R"}) 

792 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

793 "physical_filter": "d-r"}) 

794 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

795 "physical_filter": "d-r"}) 

796 

797 # Create and store a dataset 

798 metric = makeExampleMetrics() 

799 

800 # Create two almost-identical DatasetTypes (both will use default 

801 # template) 

802 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

803 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

804 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

805 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

806 

807 dataId1 = {"instrument": "DummyCamComp", "visit": np.int64(423)} 

808 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

809 dataId3 = {"instrument": "DummyCamComp", "visit": 425} 

810 

811 # Put with exactly the data ID keys needed 

812 ref = butler.put(metric, "metric1", dataId1) 

813 self.assertTrue(self.checkFileExists(butler.datastore.root, 

814 "ingest/metric1/d-r/DummyCamComp_423.pickle")) 

815 

816 # Check the template based on dimensions 

817 butler.datastore.templates.validateTemplates([ref]) 

818 

819 # Put with extra data ID keys (physical_filter is an optional 

820 # dependency); should not change template (at least the way we're 

821 # defining them to behave now; the important thing is that they 

822 # must be consistent). 

823 ref = butler.put(metric, "metric2", dataId2) 

824 self.assertTrue(self.checkFileExists(butler.datastore.root, 

825 "ingest/metric2/d-r/DummyCamComp_v423.pickle")) 

826 

827 # Check the template based on dimensions 

828 butler.datastore.templates.validateTemplates([ref]) 

829 

830 # Now use a file template that will not result in unique filenames 

831 ref = butler.put(metric, "metric3", dataId1) 

832 

833 # Check the template based on dimensions. This one is a bad template 

834 with self.assertRaises(FileTemplateValidationError): 

835 butler.datastore.templates.validateTemplates([ref]) 

836 

837 with self.assertRaises(FileExistsError): 

838 butler.put(metric, "metric3", dataId3) 

839 

840 def testImportExport(self): 

841 # Run put/get tests just to create and populate a repo. 

842 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

843 self.runImportExportTest(storageClass) 

844 

845 @unittest.expectedFailure 

846 def testImportExportVirtualComposite(self): 

847 # Run put/get tests just to create and populate a repo. 

848 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

849 self.runImportExportTest(storageClass) 

850 

851 def runImportExportTest(self, storageClass): 

852 exportButler = self.runPutGetTest(storageClass, "test_metric") 

853 # Test that the repo actually has at least one dataset. 

854 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

855 self.assertGreater(len(datasets), 0) 

856 # Export those datasets. We used TemporaryDirectory because there 

857 # doesn't seem to be a way to get the filename (as opposed to the file 

858 # object) from any of tempfile's temporary-file context managers. 

859 with tempfile.TemporaryDirectory() as exportDir: 

860 # TODO: When PosixDatastore supports transfer-on-exist, add tests 

861 # for that. 

862 exportFile = os.path.join(exportDir, "exports.yaml") 

863 with exportButler.export(filename=exportFile) as export: 

864 export.saveDatasets(datasets) 

865 self.assertTrue(os.path.exists(exportFile)) 

866 with tempfile.TemporaryDirectory() as importDir: 

867 Butler.makeRepo(importDir, config=Config(self.configFile)) 

868 importButler = Butler(importDir, run="ingest/run") 

869 importButler.import_(filename=exportFile, directory=exportButler.datastore.root, 

870 transfer="symlink") 

871 for ref in datasets: 

872 with self.subTest(ref=ref): 

873 # Test for existence by passing in the DatasetType and 

874 # data ID separately, to avoid lookup by dataset_id. 

875 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

876 

877 

878class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

879 """PosixDatastore specialization of a butler""" 

880 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

881 fullConfigKey = ".datastore.formatters" 

882 validationCanFail = True 

883 datastoreStr = ["/tmp"] 

884 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"] 

885 registryStr = "/gen3.sqlite3" 

886 

887 

888class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

889 """InMemoryDatastore specialization of a butler""" 

890 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

891 fullConfigKey = None 

892 useTempRoot = False 

893 validationCanFail = False 

894 datastoreStr = ["datastore='InMemory"] 

895 datastoreName = ["InMemoryDatastore@"] 

896 registryStr = ":memory:" 

897 

898 def testIngest(self): 

899 pass 

900 

901 

902class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

903 """PosixDatastore specialization""" 

904 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

905 fullConfigKey = ".datastore.datastores.1.formatters" 

906 validationCanFail = True 

907 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"] 

908 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1", 

909 "SecondDatastore"] 

910 registryStr = "/gen3.sqlite3" 

911 

912 

913class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

914 """Test that a yaml file in one location can refer to a root in another.""" 

915 

916 datastoreStr = ["dir1"] 

917 # Disable the makeRepo test since we are deliberately not using 

918 # butler.yaml as the config name. 

919 fullConfigKey = None 

920 

921 def setUp(self): 

922 self.root = tempfile.mkdtemp(dir=TESTDIR) 

923 

924 # Make a new repository in one place 

925 self.dir1 = os.path.join(self.root, "dir1") 

926 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

927 

928 # Move the yaml file to a different place and add a "root" 

929 self.dir2 = os.path.join(self.root, "dir2") 

930 safeMakeDir(self.dir2) 

931 configFile1 = os.path.join(self.dir1, "butler.yaml") 

932 config = Config(configFile1) 

933 config["root"] = self.dir1 

934 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

935 config.dumpToFile(configFile2) 

936 os.remove(configFile1) 

937 self.tmpConfigFile = configFile2 

938 

939 def testFileLocations(self): 

940 self.assertNotEqual(self.dir1, self.dir2) 

941 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

942 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

943 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

944 

945 

946class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

947 """Test that a config file created by makeRepo outside of repo works.""" 

948 

949 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

950 

951 def setUp(self): 

952 self.root = tempfile.mkdtemp(dir=TESTDIR) 

953 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

954 

955 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

956 Butler.makeRepo(self.root, config=Config(self.configFile), 

957 outfile=self.tmpConfigFile) 

958 

959 def tearDown(self): 

960 if os.path.exists(self.root2): 

961 shutil.rmtree(self.root2, ignore_errors=True) 

962 super().tearDown() 

963 

964 def testConfigExistence(self): 

965 c = Config(self.tmpConfigFile) 

966 uri_config = ButlerURI(c["root"]) 

967 uri_expected = ButlerURI(self.root, forceDirectory=True) 

968 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

969 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

970 

971 def testPutGet(self): 

972 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

973 self.runPutGetTest(storageClass, "test_metric") 

974 

975 

976class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

977 """Test that a config file created by makeRepo outside of repo works.""" 

978 

979 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

980 

981 def setUp(self): 

982 self.root = tempfile.mkdtemp(dir=TESTDIR) 

983 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

984 

985 self.tmpConfigFile = self.root2 

986 Butler.makeRepo(self.root, config=Config(self.configFile), 

987 outfile=self.tmpConfigFile) 

988 

989 def testConfigExistence(self): 

990 # Append the yaml file else Config constructor does not know the file 

991 # type. 

992 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

993 super().testConfigExistence() 

994 

995 

996class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

997 """Test that a config file created by makeRepo outside of repo works.""" 

998 

999 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1000 

1001 def setUp(self): 

1002 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1003 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1004 

1005 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

1006 Butler.makeRepo(self.root, config=Config(self.configFile), 

1007 outfile=self.tmpConfigFile) 

1008 

1009 

1010@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1011@mock_s3 

1012class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

1013 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1014 a local in-memory SqlRegistry. 

1015 """ 

1016 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1017 fullConfigKey = None 

1018 validationCanFail = True 

1019 

1020 bucketName = "anybucketname" 

1021 """Name of the Bucket that will be used in the tests. The name is read from 

1022 the config file used with the tests during set-up. 

1023 """ 

1024 

1025 root = "butlerRoot/" 

1026 """Root repository directory expected to be used in case useTempRoot=False. 

1027 Otherwise the root is set to a 20 characters long randomly generated string 

1028 during set-up. 

1029 """ 

1030 

1031 datastoreStr = [f"datastore={root}"] 

1032 """Contains all expected root locations in a format expected to be 

1033 returned by Butler stringification. 

1034 """ 

1035 

1036 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"] 

1037 """The expected format of the S3Datastore string.""" 

1038 

1039 registryStr = ":memory:" 

1040 """Expected format of the Registry string.""" 

1041 

1042 def genRoot(self): 

1043 """Returns a random string of len 20 to serve as a root 

1044 name for the temporary bucket repo. 

1045 

1046 This is equivalent to tempfile.mkdtemp as this is what self.root 

1047 becomes when useTempRoot is True. 

1048 """ 

1049 rndstr = "".join( 

1050 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1051 ) 

1052 return rndstr + "/" 

1053 

1054 def setUp(self): 

1055 config = Config(self.configFile) 

1056 uri = ButlerURI(config[".datastore.datastore.root"]) 

1057 self.bucketName = uri.netloc 

1058 

1059 # set up some fake credentials if they do not exist 

1060 self.usingDummyCredentials = setAwsEnvCredentials() 

1061 

1062 if self.useTempRoot: 

1063 self.root = self.genRoot() 

1064 rooturi = f"s3://{self.bucketName}/{self.root}" 

1065 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1066 

1067 # MOTO needs to know that we expect Bucket bucketname to exist 

1068 # (this used to be the class attribute bucketName) 

1069 s3 = boto3.resource("s3") 

1070 s3.create_bucket(Bucket=self.bucketName) 

1071 

1072 self.datastoreStr = f"datastore={self.root}" 

1073 self.datastoreName = [f"S3Datastore@{rooturi}"] 

1074 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1075 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1076 

1077 def tearDown(self): 

1078 s3 = boto3.resource("s3") 

1079 bucket = s3.Bucket(self.bucketName) 

1080 try: 

1081 bucket.objects.all().delete() 

1082 except botocore.exceptions.ClientError as e: 

1083 if e.response["Error"]["Code"] == "404": 

1084 # the key was not reachable - pass 

1085 pass 

1086 else: 

1087 raise 

1088 

1089 bucket = s3.Bucket(self.bucketName) 

1090 bucket.delete() 

1091 

1092 # unset any potentially set dummy credentials 

1093 if self.usingDummyCredentials: 

1094 unsetAwsEnvCredentials() 

1095 

1096 def checkFileExists(self, root, relpath): 

1097 """Checks if file exists at a given path (relative to root). 

1098 

1099 Test testPutTemplates verifies actual physical existance of the files 

1100 in the requested location. For S3Datastore this test is equivalent to 

1101 `lsst.daf.butler.core.s3utils.s3checkFileExists` call. 

1102 """ 

1103 uri = ButlerURI(root) 

1104 uri.updateFile(relpath) 

1105 return s3CheckFileExists(uri)[0] 

1106 

1107 @unittest.expectedFailure 

1108 def testImportExport(self): 

1109 super().testImportExport() 

1110 

1111 

1112if __name__ == "__main__": 1112 ↛ 1113line 1112 didn't jump to line 1113, because the condition on line 1112 was never true

1113 unittest.main()