Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import os 

26import posixpath 

27import unittest 

28import tempfile 

29import shutil 

30import pickle 

31import string 

32import random 

33 

34try: 

35 import boto3 

36 import botocore 

37 from moto import mock_s3 

38except ImportError: 

39 boto3 = None 

40 

41 def mock_s3(cls): 

42 """A no-op decorator in case moto mock_s3 can not be imported. 

43 """ 

44 return cls 

45 

46import astropy.time 

47from lsst.utils import doImport 

48from lsst.daf.butler.core.utils import safeMakeDir 

49from lsst.daf.butler import Butler, Config, ButlerConfig 

50from lsst.daf.butler import StorageClassFactory 

51from lsst.daf.butler import DatasetType, DatasetRef 

52from lsst.daf.butler import FileTemplateValidationError, ValidationError 

53from lsst.daf.butler import FileDataset 

54from lsst.daf.butler import CollectionSearch, CollectionType 

55from lsst.daf.butler import ButlerURI 

56from lsst.daf.butler import script 

57from lsst.daf.butler.registry import MissingCollectionError 

58from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

59from lsst.daf.butler.core.s3utils import (setAwsEnvCredentials, 

60 unsetAwsEnvCredentials) 

61 

62from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

63 

64TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

65 

66 

67def makeExampleMetrics(): 

68 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

69 {"a": [1, 2, 3], 

70 "b": {"blue": 5, "red": "green"}}, 

71 [563, 234, 456.7, 752, 8, 9, 27] 

72 ) 

73 

74 

75class TransactionTestError(Exception): 

76 """Specific error for testing transactions, to prevent misdiagnosing 

77 that might otherwise occur when a standard exception is used. 

78 """ 

79 pass 

80 

81 

82class ButlerConfigTests(unittest.TestCase): 

83 """Simple tests for ButlerConfig that are not tested in other test cases. 

84 """ 

85 

86 def testSearchPath(self): 

87 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

88 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

89 config1 = ButlerConfig(configFile) 

90 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

91 

92 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

93 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

94 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

95 self.assertIn("testConfigs", "\n".join(cm.output)) 

96 

97 key = ("datastore", "records", "table") 

98 self.assertNotEqual(config1[key], config2[key]) 

99 self.assertEqual(config2[key], "override_record") 

100 

101 

102class ButlerPutGetTests: 

103 """Helper method for running a suite of put/get tests from different 

104 butler configurations.""" 

105 

106 root = None 

107 

108 @staticmethod 

109 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

110 """Create a DatasetType and register it 

111 """ 

112 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

113 registry.registerDatasetType(datasetType) 

114 return datasetType 

115 

116 @classmethod 

117 def setUpClass(cls): 

118 cls.storageClassFactory = StorageClassFactory() 

119 cls.storageClassFactory.addFromConfig(cls.configFile) 

120 

121 def assertGetComponents(self, butler, datasetRef, components, reference): 

122 datasetType = datasetRef.datasetType 

123 dataId = datasetRef.dataId 

124 for component in components: 

125 compTypeName = datasetType.componentTypeName(component) 

126 result = butler.get(compTypeName, dataId) 

127 self.assertEqual(result, getattr(reference, component)) 

128 

129 def tearDown(self): 

130 if self.root is not None and os.path.exists(self.root): 

131 shutil.rmtree(self.root, ignore_errors=True) 

132 

133 def runPutGetTest(self, storageClass, datasetTypeName): 

134 # New datasets will be added to run and tag, but we will only look in 

135 # tag when looking up datasets. 

136 run = "ingest/run" 

137 tag = "ingest" 

138 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag]) 

139 

140 # There will not be a collection yet 

141 collections = set(butler.registry.queryCollections()) 

142 self.assertEqual(collections, set([run, tag])) 

143 

144 # Create and register a DatasetType 

145 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

146 

147 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

148 

149 # Try to create one that will have a name that is too long 

150 with self.assertRaises(Exception) as cm: 

151 self.addDatasetType("DatasetTypeNameTooLong" * 50, dimensions, storageClass, butler.registry) 

152 self.assertIn("check constraint", str(cm.exception).lower()) 

153 

154 # Add needed Dimensions 

155 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

156 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

157 "name": "d-r", 

158 "abstract_filter": "R"}) 

159 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp", 

160 "id": 1, 

161 "name": "default"}) 

162 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

163 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

164 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

165 "name": "fourtwentythree", "physical_filter": "d-r", 

166 "visit_system": 1, "datetime_begin": visit_start, 

167 "datetime_end": visit_end}) 

168 

169 # Add a second visit for some later tests 

170 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424, 

171 "name": "fourtwentyfour", "physical_filter": "d-r", 

172 "visit_system": 1}) 

173 

174 # Create and store a dataset 

175 metric = makeExampleMetrics() 

176 dataId = {"instrument": "DummyCamComp", "visit": 423} 

177 

178 # Create a DatasetRef for put 

179 refIn = DatasetRef(datasetType, dataId, id=None) 

180 

181 # Put with a preexisting id should fail 

182 with self.assertRaises(ValueError): 

183 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

184 

185 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

186 # and once with a DatasetType 

187 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

188 with self.subTest(args=args): 

189 ref = butler.put(metric, *args) 

190 self.assertIsInstance(ref, DatasetRef) 

191 

192 # Test getDirect 

193 metricOut = butler.getDirect(ref) 

194 self.assertEqual(metric, metricOut) 

195 # Test get 

196 metricOut = butler.get(ref.datasetType.name, dataId) 

197 self.assertEqual(metric, metricOut) 

198 # Test get with a datasetRef 

199 metricOut = butler.get(ref) 

200 self.assertEqual(metric, metricOut) 

201 # Test getDeferred with dataId 

202 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get() 

203 self.assertEqual(metric, metricOut) 

204 # Test getDeferred with a datasetRef 

205 metricOut = butler.getDeferred(ref).get() 

206 self.assertEqual(metric, metricOut) 

207 

208 # Check we can get components 

209 if storageClass.isComposite(): 

210 self.assertGetComponents(butler, ref, 

211 ("summary", "data", "output"), metric) 

212 

213 # Remove from the tagged collection only; after that we 

214 # shouldn't be able to find it unless we use the dataset_id. 

215 butler.pruneDatasets([ref]) 

216 with self.assertRaises(LookupError): 

217 butler.datasetExists(*args) 

218 # Registry still knows about it, if we use the dataset_id. 

219 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

220 # If we use the output ref with the dataset_id, we should 

221 # still be able to load it with getDirect(). 

222 self.assertEqual(metric, butler.getDirect(ref)) 

223 

224 # Reinsert into collection, then delete from Datastore *and* 

225 # remove from collection. 

226 butler.registry.associate(tag, [ref]) 

227 butler.pruneDatasets([ref], unstore=True) 

228 # Lookup with original args should still fail. 

229 with self.assertRaises(LookupError): 

230 butler.datasetExists(*args) 

231 # Now getDirect() should fail, too. 

232 with self.assertRaises(FileNotFoundError, msg=f"Checking ref {ref} not found"): 

233 butler.getDirect(ref) 

234 # Registry still knows about it, if we use the dataset_id. 

235 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

236 

237 # Now remove the dataset completely. 

238 butler.pruneDatasets([ref], purge=True, unstore=True) 

239 # Lookup with original args should still fail. 

240 with self.assertRaises(LookupError): 

241 butler.datasetExists(*args) 

242 # getDirect() should still fail. 

243 with self.assertRaises(FileNotFoundError): 

244 butler.getDirect(ref) 

245 # Registry shouldn't be able to find it by dataset_id anymore. 

246 self.assertIsNone(butler.registry.getDataset(ref.id)) 

247 

248 # Put the dataset again, since the last thing we did was remove it. 

249 ref = butler.put(metric, refIn) 

250 

251 # Get with parameters 

252 stop = 4 

253 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

254 self.assertNotEqual(metric, sliced) 

255 self.assertEqual(metric.summary, sliced.summary) 

256 self.assertEqual(metric.output, sliced.output) 

257 self.assertEqual(metric.data[:stop], sliced.data) 

258 # getDeferred with parameters 

259 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

260 self.assertNotEqual(metric, sliced) 

261 self.assertEqual(metric.summary, sliced.summary) 

262 self.assertEqual(metric.output, sliced.output) 

263 self.assertEqual(metric.data[:stop], sliced.data) 

264 # getDeferred with deferred parameters 

265 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

266 self.assertNotEqual(metric, sliced) 

267 self.assertEqual(metric.summary, sliced.summary) 

268 self.assertEqual(metric.output, sliced.output) 

269 self.assertEqual(metric.data[:stop], sliced.data) 

270 

271 if storageClass.isComposite(): 

272 # Check that components can be retrieved 

273 metricOut = butler.get(ref.datasetType.name, dataId) 

274 compNameS = ref.datasetType.componentTypeName("summary") 

275 compNameD = ref.datasetType.componentTypeName("data") 

276 summary = butler.get(compNameS, dataId) 

277 self.assertEqual(summary, metric.summary) 

278 data = butler.get(compNameD, dataId) 

279 self.assertEqual(data, metric.data) 

280 

281 if "counter" in storageClass.readComponents: 

282 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

283 self.assertEqual(count, len(data)) 

284 

285 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId, 

286 parameters={"slice": slice(stop)}) 

287 self.assertEqual(count, stop) 

288 

289 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

290 summary = butler.getDirect(compRef) 

291 self.assertEqual(summary, metric.summary) 

292 

293 # Create a Dataset type that has the same name but is inconsistent. 

294 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions, 

295 self.storageClassFactory.getStorageClass("Config")) 

296 

297 # Getting with a dataset type that does not match registry fails 

298 with self.assertRaises(ValueError): 

299 butler.get(inconsistentDatasetType, dataId) 

300 

301 # Combining a DatasetRef with a dataId should fail 

302 with self.assertRaises(ValueError): 

303 butler.get(ref, dataId) 

304 # Getting with an explicit ref should fail if the id doesn't match 

305 with self.assertRaises(ValueError): 

306 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

307 

308 # Getting a dataset with unknown parameters should fail 

309 with self.assertRaises(KeyError): 

310 butler.get(ref, parameters={"unsupported": True}) 

311 

312 # Check we have a collection 

313 collections = set(butler.registry.queryCollections()) 

314 self.assertEqual(collections, {run, tag}) 

315 

316 # Clean up to check that we can remove something that may have 

317 # already had a component removed 

318 butler.pruneDatasets([ref], unstore=True, purge=True) 

319 

320 # Add a dataset back in since some downstream tests require 

321 # something to be present 

322 ref = butler.put(metric, refIn) 

323 

324 return butler 

325 

326 def testDeferredCollectionPassing(self): 

327 # Construct a butler with no run or collection, but make it writeable. 

328 butler = Butler(self.tmpConfigFile, writeable=True) 

329 # Create and register a DatasetType 

330 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

331 datasetType = self.addDatasetType("example", dimensions, 

332 self.storageClassFactory.getStorageClass("StructuredData"), 

333 butler.registry) 

334 # Add needed Dimensions 

335 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

336 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

337 "name": "d-r", 

338 "abstract_filter": "R"}) 

339 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

340 "name": "fourtwentythree", "physical_filter": "d-r"}) 

341 dataId = {"instrument": "DummyCamComp", "visit": 423} 

342 # Create dataset. 

343 metric = makeExampleMetrics() 

344 # Register a new run and put dataset. 

345 run = "deferred" 

346 butler.registry.registerRun(run) 

347 ref = butler.put(metric, datasetType, dataId, run=run) 

348 # Putting with no run should fail with TypeError. 

349 with self.assertRaises(TypeError): 

350 butler.put(metric, datasetType, dataId) 

351 # Dataset should exist. 

352 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

353 # We should be able to get the dataset back, but with and without 

354 # a deferred dataset handle. 

355 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

356 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

357 # Trying to find the dataset without any collection is a TypeError. 

358 with self.assertRaises(TypeError): 

359 butler.datasetExists(datasetType, dataId) 

360 with self.assertRaises(TypeError): 

361 butler.get(datasetType, dataId) 

362 # Associate the dataset with a different collection. 

363 butler.registry.registerCollection("tagged") 

364 butler.registry.associate("tagged", [ref]) 

365 # Deleting the dataset from the new collection should make it findable 

366 # in the original collection. 

367 butler.pruneDatasets([ref], tags=["tagged"]) 

368 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

369 

370 

371class ButlerTests(ButlerPutGetTests): 

372 """Tests for Butler. 

373 """ 

374 useTempRoot = True 

375 

376 def setUp(self): 

377 """Create a new butler root for each test.""" 

378 if self.useTempRoot: 

379 self.root = tempfile.mkdtemp(dir=TESTDIR) 

380 Butler.makeRepo(self.root, config=Config(self.configFile)) 

381 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

382 else: 

383 self.root = None 

384 self.tmpConfigFile = self.configFile 

385 

386 def testConstructor(self): 

387 """Independent test of constructor. 

388 """ 

389 butler = Butler(self.tmpConfigFile, run="ingest") 

390 self.assertIsInstance(butler, Butler) 

391 

392 collections = set(butler.registry.queryCollections()) 

393 self.assertEqual(collections, {"ingest"}) 

394 

395 butler2 = Butler(butler=butler, collections=["other"]) 

396 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"])) 

397 self.assertIsNone(butler2.run) 

398 self.assertIs(butler.registry, butler2.registry) 

399 self.assertIs(butler.datastore, butler2.datastore) 

400 

401 def testBasicPutGet(self): 

402 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

403 self.runPutGetTest(storageClass, "test_metric") 

404 

405 def testCompositePutGetConcrete(self): 

406 

407 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

408 butler = self.runPutGetTest(storageClass, "test_metric") 

409 

410 # Should *not* be disassembled 

411 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

412 self.assertEqual(len(datasets), 1) 

413 uri, components = butler.getURIs(datasets[0]) 

414 self.assertIsInstance(uri, ButlerURI) 

415 self.assertFalse(components) 

416 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

417 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

418 

419 # Predicted dataset 

420 dataId = {"instrument": "DummyCamComp", "visit": 424} 

421 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

422 self.assertFalse(components) 

423 self.assertIsInstance(uri, ButlerURI) 

424 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

425 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

426 

427 def testCompositePutGetVirtual(self): 

428 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

429 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

430 

431 # Should be disassembled 

432 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

433 self.assertEqual(len(datasets), 1) 

434 uri, components = butler.getURIs(datasets[0]) 

435 

436 if butler.datastore.isEphemeral: 

437 # Never disassemble in-memory datastore 

438 self.assertIsInstance(uri, ButlerURI) 

439 self.assertFalse(components) 

440 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

441 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

442 else: 

443 self.assertIsNone(uri) 

444 self.assertEqual(set(components), set(storageClass.components)) 

445 for compuri in components.values(): 

446 self.assertIsInstance(compuri, ButlerURI) 

447 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

448 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

449 

450 # Predicted dataset 

451 dataId = {"instrument": "DummyCamComp", "visit": 424} 

452 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

453 

454 if butler.datastore.isEphemeral: 

455 # Never disassembled 

456 self.assertIsInstance(uri, ButlerURI) 

457 self.assertFalse(components) 

458 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

459 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

460 else: 

461 self.assertIsNone(uri) 

462 self.assertEqual(set(components), set(storageClass.components)) 

463 for compuri in components.values(): 

464 self.assertIsInstance(compuri, ButlerURI) 

465 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

466 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

467 

468 def testIngest(self): 

469 butler = Butler(self.tmpConfigFile, run="ingest") 

470 

471 # Create and register a DatasetType 

472 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

473 

474 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

475 datasetTypeName = "metric" 

476 

477 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

478 

479 # Add needed Dimensions 

480 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

481 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

482 "name": "d-r", 

483 "abstract_filter": "R"}) 

484 for detector in (1, 2): 

485 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

486 "full_name": f"detector{detector}"}) 

487 

488 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

489 "name": "fourtwentythree", "physical_filter": "d-r"}, 

490 {"instrument": "DummyCamComp", "id": 424, 

491 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

492 

493 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

494 dataRoot = os.path.join(TESTDIR, "data", "basic") 

495 datasets = [] 

496 for detector in (1, 2): 

497 detector_name = f"detector_{detector}" 

498 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

499 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

500 # Create a DatasetRef for ingest 

501 refIn = DatasetRef(datasetType, dataId, id=None) 

502 

503 datasets.append(FileDataset(path=metricFile, 

504 refs=[refIn], 

505 formatter=formatter)) 

506 

507 butler.ingest(*datasets, transfer="copy") 

508 

509 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

510 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

511 

512 metrics1 = butler.get(datasetTypeName, dataId1) 

513 metrics2 = butler.get(datasetTypeName, dataId2) 

514 self.assertNotEqual(metrics1, metrics2) 

515 

516 # Compare URIs 

517 uri1 = butler.getURI(datasetTypeName, dataId1) 

518 uri2 = butler.getURI(datasetTypeName, dataId2) 

519 self.assertNotEqual(uri1, uri2) 

520 

521 # Now do a multi-dataset but single file ingest 

522 metricFile = os.path.join(dataRoot, "detectors.yaml") 

523 refs = [] 

524 for detector in (1, 2): 

525 detector_name = f"detector_{detector}" 

526 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

527 # Create a DatasetRef for ingest 

528 refs.append(DatasetRef(datasetType, dataId, id=None)) 

529 

530 datasets = [] 

531 datasets.append(FileDataset(path=metricFile, 

532 refs=refs, 

533 formatter=MultiDetectorFormatter)) 

534 

535 butler.ingest(*datasets, transfer="copy") 

536 

537 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

538 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

539 

540 multi1 = butler.get(datasetTypeName, dataId1) 

541 multi2 = butler.get(datasetTypeName, dataId2) 

542 

543 self.assertEqual(multi1, metrics1) 

544 self.assertEqual(multi2, metrics2) 

545 

546 # Compare URIs 

547 uri1 = butler.getURI(datasetTypeName, dataId1) 

548 uri2 = butler.getURI(datasetTypeName, dataId2) 

549 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

550 

551 # Test that removing one does not break the second 

552 # This line will issue a warning log message for a ChainedDatastore 

553 # that uses an InMemoryDatastore since in-memory can not ingest 

554 # files. 

555 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

556 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

557 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

558 multi2b = butler.get(datasetTypeName, dataId2) 

559 self.assertEqual(multi2, multi2b) 

560 

561 def testPruneCollections(self): 

562 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

563 butler = Butler(self.tmpConfigFile, writeable=True) 

564 # Load registry data with dimensions to hang datasets off of. 

565 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

566 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

567 # Add some RUN-type collections. 

568 run1 = "run1" 

569 butler.registry.registerRun(run1) 

570 run2 = "run2" 

571 butler.registry.registerRun(run2) 

572 # put some datasets. ref1 and ref2 have the same data ID, and are in 

573 # different runs. ref3 has a different data ID. 

574 metric = makeExampleMetrics() 

575 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

576 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

577 butler.registry) 

578 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

579 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

580 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

581 # Try to delete a RUN collection without purge, or with purge and not 

582 # unstore. 

583 with self.assertRaises(TypeError): 

584 butler.pruneCollection(run1) 

585 with self.assertRaises(TypeError): 

586 butler.pruneCollection(run2, purge=True) 

587 # Add a TAGGED collection and associate ref3 only into it. 

588 tag1 = "tag1" 

589 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

590 butler.registry.associate(tag1, [ref3]) 

591 # Add a CHAINED collection that searches run1 and then run2. It 

592 # logically contains only ref1, because ref2 is shadowed due to them 

593 # having the same data ID and dataset type. 

594 chain1 = "chain1" 

595 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

596 butler.registry.setCollectionChain(chain1, [run1, run2]) 

597 # Try to delete RUN collections, which should fail with complete 

598 # rollback because they're still referenced by the CHAINED 

599 # collection. 

600 with self.assertRaises(Exception): 

601 butler.pruneCollection(run1, pruge=True, unstore=True) 

602 with self.assertRaises(Exception): 

603 butler.pruneCollection(run2, pruge=True, unstore=True) 

604 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

605 [ref1, ref2, ref3]) 

606 self.assertTrue(butler.datastore.exists(ref1)) 

607 self.assertTrue(butler.datastore.exists(ref2)) 

608 self.assertTrue(butler.datastore.exists(ref3)) 

609 # Try to delete CHAINED and TAGGED collections with purge; should not 

610 # work. 

611 with self.assertRaises(TypeError): 

612 butler.pruneCollection(tag1, purge=True, unstore=True) 

613 with self.assertRaises(TypeError): 

614 butler.pruneCollection(chain1, purge=True, unstore=True) 

615 # Remove the tagged collection with unstore=False. This should not 

616 # affect the datasets. 

617 butler.pruneCollection(tag1) 

618 with self.assertRaises(MissingCollectionError): 

619 butler.registry.getCollectionType(tag1) 

620 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

621 [ref1, ref2, ref3]) 

622 self.assertTrue(butler.datastore.exists(ref1)) 

623 self.assertTrue(butler.datastore.exists(ref2)) 

624 self.assertTrue(butler.datastore.exists(ref3)) 

625 # Add the tagged collection back in, and remove it with unstore=True. 

626 # This should remove ref3 only from the datastore. 

627 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

628 butler.registry.associate(tag1, [ref3]) 

629 butler.pruneCollection(tag1, unstore=True) 

630 with self.assertRaises(MissingCollectionError): 

631 butler.registry.getCollectionType(tag1) 

632 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

633 [ref1, ref2, ref3]) 

634 self.assertTrue(butler.datastore.exists(ref1)) 

635 self.assertTrue(butler.datastore.exists(ref2)) 

636 self.assertFalse(butler.datastore.exists(ref3)) 

637 # Delete the chain with unstore=False. The datasets should not be 

638 # affected at all. 

639 butler.pruneCollection(chain1) 

640 with self.assertRaises(MissingCollectionError): 

641 butler.registry.getCollectionType(chain1) 

642 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

643 [ref1, ref2, ref3]) 

644 self.assertTrue(butler.datastore.exists(ref1)) 

645 self.assertTrue(butler.datastore.exists(ref2)) 

646 self.assertFalse(butler.datastore.exists(ref3)) 

647 # Redefine and then delete the chain with unstore=True. Only ref1 

648 # should be unstored (ref3 has already been unstored, but otherwise 

649 # would be now). 

650 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

651 butler.registry.setCollectionChain(chain1, [run1, run2]) 

652 butler.pruneCollection(chain1, unstore=True) 

653 with self.assertRaises(MissingCollectionError): 

654 butler.registry.getCollectionType(chain1) 

655 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

656 [ref1, ref2, ref3]) 

657 self.assertFalse(butler.datastore.exists(ref1)) 

658 self.assertTrue(butler.datastore.exists(ref2)) 

659 self.assertFalse(butler.datastore.exists(ref3)) 

660 # Remove run1. This removes ref1 and ref3 from the registry (they're 

661 # already gone from the datastore, which is fine). 

662 butler.pruneCollection(run1, purge=True, unstore=True) 

663 with self.assertRaises(MissingCollectionError): 

664 butler.registry.getCollectionType(run1) 

665 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

666 [ref2]) 

667 self.assertTrue(butler.datastore.exists(ref2)) 

668 # Remove run2. This removes ref2 from the registry and the datastore. 

669 butler.pruneCollection(run2, purge=True, unstore=True) 

670 with self.assertRaises(MissingCollectionError): 

671 butler.registry.getCollectionType(run2) 

672 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

673 []) 

674 

675 def testPickle(self): 

676 """Test pickle support. 

677 """ 

678 butler = Butler(self.tmpConfigFile, run="ingest") 

679 butlerOut = pickle.loads(pickle.dumps(butler)) 

680 self.assertIsInstance(butlerOut, Butler) 

681 self.assertEqual(butlerOut._config, butler._config) 

682 self.assertEqual(butlerOut.collections, butler.collections) 

683 self.assertEqual(butlerOut.run, butler.run) 

684 

685 def testGetDatasetTypes(self): 

686 butler = Butler(self.tmpConfigFile, run="ingest") 

687 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

688 dimensionEntries = [ 

689 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

690 {"instrument": "DummyCamComp"}), 

691 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}), 

692 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

693 ] 

694 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

695 # Add needed Dimensions 

696 for args in dimensionEntries: 

697 butler.registry.insertDimensionData(*args) 

698 

699 # When a DatasetType is added to the registry entries are not created 

700 # for components but querying them can return the components. 

701 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

702 components = set() 

703 for datasetTypeName in datasetTypeNames: 

704 # Create and register a DatasetType 

705 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

706 

707 for componentName in storageClass.components: 

708 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

709 

710 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

711 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

712 

713 # Now that we have some dataset types registered, validate them 

714 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

715 "datasetType.component"]) 

716 

717 # Add a new datasetType that will fail template validation 

718 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

719 if self.validationCanFail: 

720 with self.assertRaises(ValidationError): 

721 butler.validateConfiguration() 

722 

723 # Rerun validation but with a subset of dataset type names 

724 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

725 

726 # Rerun validation but ignore the bad datasetType 

727 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

728 "datasetType.component"]) 

729 

730 def testTransaction(self): 

731 butler = Butler(self.tmpConfigFile, run="ingest") 

732 datasetTypeName = "test_metric" 

733 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

734 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

735 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

736 "abstract_filter": "R"}), 

737 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

738 "physical_filter": "d-r"})) 

739 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

740 metric = makeExampleMetrics() 

741 dataId = {"instrument": "DummyCam", "visit": 42} 

742 # Create and register a DatasetType 

743 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

744 with self.assertRaises(TransactionTestError): 

745 with butler.transaction(): 

746 # Add needed Dimensions 

747 for args in dimensionEntries: 

748 butler.registry.insertDimensionData(*args) 

749 # Store a dataset 

750 ref = butler.put(metric, datasetTypeName, dataId) 

751 self.assertIsInstance(ref, DatasetRef) 

752 # Test getDirect 

753 metricOut = butler.getDirect(ref) 

754 self.assertEqual(metric, metricOut) 

755 # Test get 

756 metricOut = butler.get(datasetTypeName, dataId) 

757 self.assertEqual(metric, metricOut) 

758 # Check we can get components 

759 self.assertGetComponents(butler, ref, 

760 ("summary", "data", "output"), metric) 

761 raise TransactionTestError("This should roll back the entire transaction") 

762 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"): 

763 butler.registry.expandDataId(dataId) 

764 # Should raise LookupError for missing data ID value 

765 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

766 butler.get(datasetTypeName, dataId) 

767 # Also check explicitly if Dataset entry is missing 

768 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

769 # Direct retrieval should not find the file in the Datastore 

770 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

771 butler.getDirect(ref) 

772 

773 def testMakeRepo(self): 

774 """Test that we can write butler configuration to a new repository via 

775 the Butler.makeRepo interface and then instantiate a butler from the 

776 repo root. 

777 """ 

778 # Do not run the test if we know this datastore configuration does 

779 # not support a file system root 

780 if self.fullConfigKey is None: 

781 return 

782 

783 # Remove the file created in setUp 

784 os.unlink(self.tmpConfigFile) 

785 

786 createRegistry = not self.useTempRoot 

787 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile), 

788 createRegistry=createRegistry) 

789 limited = Config(self.configFile) 

790 butler1 = Butler(butlerConfig) 

791 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

792 config=Config(self.configFile), overwrite=True) 

793 full = Config(self.tmpConfigFile) 

794 butler2 = Butler(butlerConfig) 

795 # Butlers should have the same configuration regardless of whether 

796 # defaults were expanded. 

797 self.assertEqual(butler1._config, butler2._config) 

798 # Config files loaded directly should not be the same. 

799 self.assertNotEqual(limited, full) 

800 # Make sure "limited" doesn't have a few keys we know it should be 

801 # inheriting from defaults. 

802 self.assertIn(self.fullConfigKey, full) 

803 self.assertNotIn(self.fullConfigKey, limited) 

804 

805 # Collections don't appear until something is put in them 

806 collections1 = set(butler1.registry.queryCollections()) 

807 self.assertEqual(collections1, set()) 

808 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

809 

810 # Check that a config with no associated file name will not 

811 # work properly with relocatable Butler repo 

812 butlerConfig.configFile = None 

813 with self.assertRaises(ValueError): 

814 Butler(butlerConfig) 

815 

816 with self.assertRaises(FileExistsError): 

817 Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

818 config=Config(self.configFile), overwrite=False) 

819 

820 def testStringification(self): 

821 butler = Butler(self.tmpConfigFile, run="ingest") 

822 butlerStr = str(butler) 

823 

824 if self.datastoreStr is not None: 

825 for testStr in self.datastoreStr: 

826 self.assertIn(testStr, butlerStr) 

827 if self.registryStr is not None: 

828 self.assertIn(self.registryStr, butlerStr) 

829 

830 datastoreName = butler.datastore.name 

831 if self.datastoreName is not None: 

832 for testStr in self.datastoreName: 

833 self.assertIn(testStr, datastoreName) 

834 

835 

836class FileLikeDatastoreButlerTests(ButlerTests): 

837 """Common tests and specialization of ButlerTests for butlers backed 

838 by datastores that inherit from FileLikeDatastore. 

839 """ 

840 

841 def checkFileExists(self, root, relpath): 

842 """Checks if file exists at a given path (relative to root). 

843 

844 Test testPutTemplates verifies actual physical existance of the files 

845 in the requested location. 

846 """ 

847 uri = ButlerURI(root, forceDirectory=True) 

848 return uri.join(relpath).exists() 

849 

850 def testPutTemplates(self): 

851 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

852 butler = Butler(self.tmpConfigFile, run="ingest") 

853 

854 # Add needed Dimensions 

855 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

856 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

857 "name": "d-r", 

858 "abstract_filter": "R"}) 

859 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

860 "physical_filter": "d-r"}) 

861 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

862 "physical_filter": "d-r"}) 

863 

864 # Create and store a dataset 

865 metric = makeExampleMetrics() 

866 

867 # Create two almost-identical DatasetTypes (both will use default 

868 # template) 

869 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

870 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

871 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

872 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

873 

874 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

875 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

876 dataId3 = {"instrument": "DummyCamComp", "visit": 425} 

877 

878 # Put with exactly the data ID keys needed 

879 ref = butler.put(metric, "metric1", dataId1) 

880 uri = butler.getURI(ref) 

881 self.assertTrue(self.checkFileExists(butler.datastore.root, 

882 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"), 

883 f"Checking existence of {uri}") 

884 

885 # Check the template based on dimensions 

886 butler.datastore.templates.validateTemplates([ref]) 

887 

888 # Put with extra data ID keys (physical_filter is an optional 

889 # dependency); should not change template (at least the way we're 

890 # defining them to behave now; the important thing is that they 

891 # must be consistent). 

892 ref = butler.put(metric, "metric2", dataId2) 

893 uri = butler.getURI(ref) 

894 self.assertTrue(self.checkFileExists(butler.datastore.root, 

895 "ingest/metric2/d-r/DummyCamComp_v423.pickle"), 

896 f"Checking existence of {uri}") 

897 

898 # Check the template based on dimensions 

899 butler.datastore.templates.validateTemplates([ref]) 

900 

901 # Now use a file template that will not result in unique filenames 

902 ref = butler.put(metric, "metric3", dataId1) 

903 

904 # Check the template based on dimensions. This one is a bad template 

905 with self.assertRaises(FileTemplateValidationError): 

906 butler.datastore.templates.validateTemplates([ref]) 

907 

908 with self.assertRaises(FileExistsError): 

909 butler.put(metric, "metric3", dataId3) 

910 

911 def testImportExport(self): 

912 # Run put/get tests just to create and populate a repo. 

913 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

914 self.runImportExportTest(storageClass) 

915 

916 @unittest.expectedFailure 

917 def testImportExportVirtualComposite(self): 

918 # Run put/get tests just to create and populate a repo. 

919 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

920 self.runImportExportTest(storageClass) 

921 

922 def runImportExportTest(self, storageClass): 

923 """This test does an export to a temp directory and an import back 

924 into a new temp directory repo. It does not assume a posix datastore""" 

925 exportButler = self.runPutGetTest(storageClass, "test_metric") 

926 print("Root:", exportButler.datastore.root) 

927 # Test that the repo actually has at least one dataset. 

928 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

929 self.assertGreater(len(datasets), 0) 

930 # Export those datasets. We used TemporaryDirectory because there 

931 # doesn't seem to be a way to get the filename (as opposed to the file 

932 # object) from any of tempfile's temporary-file context managers. 

933 with tempfile.TemporaryDirectory() as exportDir: 

934 # TODO: When PosixDatastore supports transfer-on-exist, add tests 

935 # for that. 

936 exportFile = os.path.join(exportDir, "exports.yaml") 

937 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

938 export.saveDatasets(datasets) 

939 self.assertTrue(os.path.exists(exportFile)) 

940 with tempfile.TemporaryDirectory() as importDir: 

941 # We always want this to be a local posix butler 

942 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

943 # Calling script.butlerImport tests the implementation of the 

944 # butler command line interface "import" subcommand. Functions 

945 # in the script folder are generally considered protected and 

946 # should not be used as public api. 

947 with open(exportFile, "r") as f: 

948 script.butlerImport(importDir, output_run="ingest/run", export_file=f, 

949 directory=exportDir, transfer="auto") 

950 importButler = Butler(importDir, run="ingest/run") 

951 for ref in datasets: 

952 with self.subTest(ref=ref): 

953 # Test for existence by passing in the DatasetType and 

954 # data ID separately, to avoid lookup by dataset_id. 

955 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

956 

957 

958class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

959 """PosixDatastore specialization of a butler""" 

960 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

961 fullConfigKey = ".datastore.formatters" 

962 validationCanFail = True 

963 datastoreStr = ["/tmp"] 

964 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"] 

965 registryStr = "/gen3.sqlite3" 

966 

967 def testExportTransferCopy(self): 

968 """Test local export using all transfer modes""" 

969 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

970 exportButler = self.runPutGetTest(storageClass, "test_metric") 

971 # Test that the repo actually has at least one dataset. 

972 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

973 self.assertGreater(len(datasets), 0) 

974 uris = [exportButler.getURI(d) for d in datasets] 

975 datastoreRoot = ButlerURI(exportButler.datastore.root, forceDirectory=True) 

976 

977 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

978 

979 for path in pathsInStore: 

980 # Assume local file system 

981 self.assertTrue(self.checkFileExists(datastoreRoot, path), 

982 f"Checking path {path}") 

983 

984 for transfer in ("copy", "link", "symlink", "relsymlink"): 

985 with tempfile.TemporaryDirectory(dir=TESTDIR) as exportDir: 

986 with exportButler.export(directory=exportDir, format="yaml", 

987 transfer=transfer) as export: 

988 export.saveDatasets(datasets) 

989 for path in pathsInStore: 

990 self.assertTrue(self.checkFileExists(exportDir, path), 

991 f"Check that mode {transfer} exported files") 

992 

993 

994class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

995 """InMemoryDatastore specialization of a butler""" 

996 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

997 fullConfigKey = None 

998 useTempRoot = False 

999 validationCanFail = False 

1000 datastoreStr = ["datastore='InMemory"] 

1001 datastoreName = ["InMemoryDatastore@"] 

1002 registryStr = ":memory:" 

1003 

1004 def testIngest(self): 

1005 pass 

1006 

1007 

1008class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1009 """PosixDatastore specialization""" 

1010 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1011 fullConfigKey = ".datastore.datastores.1.formatters" 

1012 validationCanFail = True 

1013 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1/,", "/PosixDatastore_2/'"] 

1014 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1", 

1015 "SecondDatastore"] 

1016 registryStr = "/gen3.sqlite3" 

1017 

1018 

1019class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1020 """Test that a yaml file in one location can refer to a root in another.""" 

1021 

1022 datastoreStr = ["dir1"] 

1023 # Disable the makeRepo test since we are deliberately not using 

1024 # butler.yaml as the config name. 

1025 fullConfigKey = None 

1026 

1027 def setUp(self): 

1028 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1029 

1030 # Make a new repository in one place 

1031 self.dir1 = os.path.join(self.root, "dir1") 

1032 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1033 

1034 # Move the yaml file to a different place and add a "root" 

1035 self.dir2 = os.path.join(self.root, "dir2") 

1036 safeMakeDir(self.dir2) 

1037 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1038 config = Config(configFile1) 

1039 config["root"] = self.dir1 

1040 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1041 config.dumpToUri(configFile2) 

1042 os.remove(configFile1) 

1043 self.tmpConfigFile = configFile2 

1044 

1045 def testFileLocations(self): 

1046 self.assertNotEqual(self.dir1, self.dir2) 

1047 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1048 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1049 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1050 

1051 

1052class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1053 """Test that a config file created by makeRepo outside of repo works.""" 

1054 

1055 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1056 

1057 def setUp(self): 

1058 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1059 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1060 

1061 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1062 Butler.makeRepo(self.root, config=Config(self.configFile), 

1063 outfile=self.tmpConfigFile) 

1064 

1065 def tearDown(self): 

1066 if os.path.exists(self.root2): 

1067 shutil.rmtree(self.root2, ignore_errors=True) 

1068 super().tearDown() 

1069 

1070 def testConfigExistence(self): 

1071 c = Config(self.tmpConfigFile) 

1072 uri_config = ButlerURI(c["root"]) 

1073 uri_expected = ButlerURI(self.root, forceDirectory=True) 

1074 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1075 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1076 

1077 def testPutGet(self): 

1078 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1079 self.runPutGetTest(storageClass, "test_metric") 

1080 

1081 

1082class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1083 """Test that a config file created by makeRepo outside of repo works.""" 

1084 

1085 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1086 

1087 def setUp(self): 

1088 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1089 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1090 

1091 self.tmpConfigFile = self.root2 

1092 Butler.makeRepo(self.root, config=Config(self.configFile), 

1093 outfile=self.tmpConfigFile) 

1094 

1095 def testConfigExistence(self): 

1096 # Append the yaml file else Config constructor does not know the file 

1097 # type. 

1098 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1099 super().testConfigExistence() 

1100 

1101 

1102class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1103 """Test that a config file created by makeRepo outside of repo works.""" 

1104 

1105 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1106 

1107 def setUp(self): 

1108 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1109 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1110 

1111 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

1112 Butler.makeRepo(self.root, config=Config(self.configFile), 

1113 outfile=self.tmpConfigFile) 

1114 

1115 

1116@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1117@mock_s3 

1118class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

1119 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1120 a local in-memory SqlRegistry. 

1121 """ 

1122 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1123 fullConfigKey = None 

1124 validationCanFail = True 

1125 

1126 bucketName = "anybucketname" 

1127 """Name of the Bucket that will be used in the tests. The name is read from 

1128 the config file used with the tests during set-up. 

1129 """ 

1130 

1131 root = "butlerRoot/" 

1132 """Root repository directory expected to be used in case useTempRoot=False. 

1133 Otherwise the root is set to a 20 characters long randomly generated string 

1134 during set-up. 

1135 """ 

1136 

1137 datastoreStr = [f"datastore={root}"] 

1138 """Contains all expected root locations in a format expected to be 

1139 returned by Butler stringification. 

1140 """ 

1141 

1142 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"] 

1143 """The expected format of the S3Datastore string.""" 

1144 

1145 registryStr = ":memory:" 

1146 """Expected format of the Registry string.""" 

1147 

1148 def genRoot(self): 

1149 """Returns a random string of len 20 to serve as a root 

1150 name for the temporary bucket repo. 

1151 

1152 This is equivalent to tempfile.mkdtemp as this is what self.root 

1153 becomes when useTempRoot is True. 

1154 """ 

1155 rndstr = "".join( 

1156 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1157 ) 

1158 return rndstr + "/" 

1159 

1160 def setUp(self): 

1161 config = Config(self.configFile) 

1162 uri = ButlerURI(config[".datastore.datastore.root"]) 

1163 self.bucketName = uri.netloc 

1164 

1165 # set up some fake credentials if they do not exist 

1166 self.usingDummyCredentials = setAwsEnvCredentials() 

1167 

1168 if self.useTempRoot: 

1169 self.root = self.genRoot() 

1170 rooturi = f"s3://{self.bucketName}/{self.root}" 

1171 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1172 

1173 # MOTO needs to know that we expect Bucket bucketname to exist 

1174 # (this used to be the class attribute bucketName) 

1175 s3 = boto3.resource("s3") 

1176 s3.create_bucket(Bucket=self.bucketName) 

1177 

1178 self.datastoreStr = f"datastore={self.root}" 

1179 self.datastoreName = [f"S3Datastore@{rooturi}"] 

1180 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1181 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1182 

1183 def tearDown(self): 

1184 s3 = boto3.resource("s3") 

1185 bucket = s3.Bucket(self.bucketName) 

1186 try: 

1187 bucket.objects.all().delete() 

1188 except botocore.exceptions.ClientError as e: 

1189 if e.response["Error"]["Code"] == "404": 

1190 # the key was not reachable - pass 

1191 pass 

1192 else: 

1193 raise 

1194 

1195 bucket = s3.Bucket(self.bucketName) 

1196 bucket.delete() 

1197 

1198 # unset any potentially set dummy credentials 

1199 if self.usingDummyCredentials: 

1200 unsetAwsEnvCredentials() 

1201 

1202 

1203if __name__ == "__main__": 1203 ↛ 1204line 1203 didn't jump to line 1204, because the condition on line 1203 was never true

1204 unittest.main()