Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import os 

26import posixpath 

27import unittest 

28import tempfile 

29import shutil 

30import pickle 

31import string 

32import random 

33import numpy as np 

34 

35try: 

36 import boto3 

37 import botocore 

38 from moto import mock_s3 

39except ImportError: 

40 boto3 = None 

41 

42 def mock_s3(cls): 

43 """A no-op decorator in case moto mock_s3 can not be imported. 

44 """ 

45 return cls 

46 

47from lsst.utils import doImport 

48from lsst.daf.butler.core.safeFileIo import safeMakeDir 

49from lsst.daf.butler import Butler, Config, ButlerConfig 

50from lsst.daf.butler import StorageClassFactory 

51from lsst.daf.butler import DatasetType, DatasetRef 

52from lsst.daf.butler import FileTemplateValidationError, ValidationError 

53from lsst.daf.butler import FileDataset 

54from lsst.daf.butler import CollectionSearch, CollectionType 

55from lsst.daf.butler.registry import MissingCollectionError 

56from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

57from lsst.daf.butler.core.location import ButlerURI 

58from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials, 

59 unsetAwsEnvCredentials) 

60 

61from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

62 

63TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

64 

65 

66def makeExampleMetrics(): 

67 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

68 {"a": [1, 2, 3], 

69 "b": {"blue": 5, "red": "green"}}, 

70 [563, 234, 456.7, 752, 8, 9, 27] 

71 ) 

72 

73 

74class TransactionTestError(Exception): 

75 """Specific error for testing transactions, to prevent misdiagnosing 

76 that might otherwise occur when a standard exception is used. 

77 """ 

78 pass 

79 

80 

81class ButlerConfigTests(unittest.TestCase): 

82 """Simple tests for ButlerConfig that are not tested in other test cases. 

83 """ 

84 

85 def testSearchPath(self): 

86 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

87 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

88 config1 = ButlerConfig(configFile) 

89 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

90 

91 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

92 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

93 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

94 self.assertIn("testConfigs", "\n".join(cm.output)) 

95 

96 key = ("datastore", "records", "table") 

97 self.assertNotEqual(config1[key], config2[key]) 

98 self.assertEqual(config2[key], "override_record") 

99 

100 

101class ButlerPutGetTests: 

102 """Helper method for running a suite of put/get tests from different 

103 butler configurations.""" 

104 

105 root = None 

106 

107 @staticmethod 

108 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

109 """Create a DatasetType and register it 

110 """ 

111 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

112 registry.registerDatasetType(datasetType) 

113 return datasetType 

114 

115 @classmethod 

116 def setUpClass(cls): 

117 cls.storageClassFactory = StorageClassFactory() 

118 cls.storageClassFactory.addFromConfig(cls.configFile) 

119 

120 def assertGetComponents(self, butler, datasetRef, components, reference): 

121 datasetTypeName = datasetRef.datasetType.name 

122 dataId = datasetRef.dataId 

123 for component in components: 

124 compTypeName = DatasetType.nameWithComponent(datasetTypeName, component) 

125 result = butler.get(compTypeName, dataId) 

126 self.assertEqual(result, getattr(reference, component)) 

127 

128 def tearDown(self): 

129 if self.root is not None and os.path.exists(self.root): 

130 shutil.rmtree(self.root, ignore_errors=True) 

131 

132 def runPutGetTest(self, storageClass, datasetTypeName): 

133 # New datasets will be added to run and tag, but we will only look in 

134 # tag when looking up datasets. 

135 run = "ingest/run" 

136 tag = "ingest" 

137 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag]) 

138 

139 # There will not be a collection yet 

140 collections = set(butler.registry.queryCollections()) 

141 self.assertEqual(collections, set([run, tag])) 

142 

143 # Create and register a DatasetType 

144 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

145 

146 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

147 

148 # Try to create one that will have a name that is too long 

149 with self.assertRaises(Exception) as cm: 

150 self.addDatasetType("DatasetTypeNameTooLong" * 50, dimensions, storageClass, butler.registry) 

151 self.assertIn("check constraint", str(cm.exception).lower()) 

152 

153 # Add needed Dimensions 

154 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

155 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

156 "name": "d-r", 

157 "abstract_filter": "R"}) 

158 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp", 

159 "id": 1, 

160 "name": "default"}) 

161 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

162 "name": "fourtwentythree", "physical_filter": "d-r", 

163 "visit_system": 1}) 

164 

165 # Add a second visit for some later tests 

166 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424, 

167 "name": "fourtwentyfour", "physical_filter": "d-r", 

168 "visit_system": 1}) 

169 

170 # Create and store a dataset 

171 metric = makeExampleMetrics() 

172 dataId = {"instrument": "DummyCamComp", "visit": 423} 

173 

174 # Create a DatasetRef for put 

175 refIn = DatasetRef(datasetType, dataId, id=None) 

176 

177 # Put with a preexisting id should fail 

178 with self.assertRaises(ValueError): 

179 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

180 

181 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

182 # and once with a DatasetType 

183 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

184 with self.subTest(args=args): 

185 ref = butler.put(metric, *args) 

186 self.assertIsInstance(ref, DatasetRef) 

187 

188 # Test getDirect 

189 metricOut = butler.getDirect(ref) 

190 self.assertEqual(metric, metricOut) 

191 # Test get 

192 metricOut = butler.get(ref.datasetType.name, dataId) 

193 self.assertEqual(metric, metricOut) 

194 # Test get with a datasetRef 

195 metricOut = butler.get(ref) 

196 self.assertEqual(metric, metricOut) 

197 # Test getDeferred with dataId 

198 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get() 

199 self.assertEqual(metric, metricOut) 

200 # Test getDeferred with a datasetRef 

201 metricOut = butler.getDeferred(ref).get() 

202 self.assertEqual(metric, metricOut) 

203 

204 # Check we can get components 

205 if storageClass.isComposite(): 

206 self.assertGetComponents(butler, ref, 

207 ("summary", "data", "output"), metric) 

208 

209 # Remove from the tagged collection only; after that we 

210 # shouldn't be able to find it unless we use the dataset_id. 

211 butler.pruneDatasets([ref]) 

212 with self.assertRaises(LookupError): 

213 butler.datasetExists(*args) 

214 # Registry still knows about it, if we use the dataset_id. 

215 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

216 # If we use the output ref with the dataset_id, we should 

217 # still be able to load it with getDirect(). 

218 self.assertEqual(metric, butler.getDirect(ref)) 

219 

220 # Reinsert into collection, then delete from Datastore *and* 

221 # remove from collection. 

222 butler.registry.associate(tag, [ref]) 

223 butler.pruneDatasets([ref], unstore=True) 

224 # Lookup with original args should still fail. 

225 with self.assertRaises(LookupError): 

226 butler.datasetExists(*args) 

227 # Now getDirect() should fail, too. 

228 with self.assertRaises(FileNotFoundError, msg=f"Checking ref {ref} not found"): 

229 butler.getDirect(ref) 

230 # Registry still knows about it, if we use the dataset_id. 

231 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

232 

233 # Now remove the dataset completely. 

234 butler.pruneDatasets([ref], purge=True, unstore=True) 

235 # Lookup with original args should still fail. 

236 with self.assertRaises(LookupError): 

237 butler.datasetExists(*args) 

238 # getDirect() should still fail. 

239 with self.assertRaises(FileNotFoundError): 

240 butler.getDirect(ref) 

241 # Registry shouldn't be able to find it by dataset_id anymore. 

242 self.assertIsNone(butler.registry.getDataset(ref.id)) 

243 

244 # Put the dataset again, since the last thing we did was remove it. 

245 ref = butler.put(metric, refIn) 

246 

247 # Get with parameters 

248 stop = 4 

249 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

250 self.assertNotEqual(metric, sliced) 

251 self.assertEqual(metric.summary, sliced.summary) 

252 self.assertEqual(metric.output, sliced.output) 

253 self.assertEqual(metric.data[:stop], sliced.data) 

254 # getDeferred with parameters 

255 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

256 self.assertNotEqual(metric, sliced) 

257 self.assertEqual(metric.summary, sliced.summary) 

258 self.assertEqual(metric.output, sliced.output) 

259 self.assertEqual(metric.data[:stop], sliced.data) 

260 # getDeferred with deferred parameters 

261 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

262 self.assertNotEqual(metric, sliced) 

263 self.assertEqual(metric.summary, sliced.summary) 

264 self.assertEqual(metric.output, sliced.output) 

265 self.assertEqual(metric.data[:stop], sliced.data) 

266 

267 if storageClass.isComposite(): 

268 # Check that components can be retrieved 

269 # ref.components will only be populated in certain cases 

270 metricOut = butler.get(ref.datasetType.name, dataId) 

271 compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary") 

272 compNameD = DatasetType.nameWithComponent(datasetTypeName, "data") 

273 summary = butler.get(compNameS, dataId) 

274 self.assertEqual(summary, metric.summary) 

275 data = butler.get(compNameD, dataId) 

276 self.assertEqual(data, metric.data) 

277 

278 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

279 if ref.components: 

280 self.assertTrue(butler.datastore.exists(ref.components["summary"])) 

281 self.assertEqual(compRef, ref.components["summary"]) 

282 self.assertTrue(butler.datastore.exists(ref.components["data"])) 

283 else: 

284 self.assertTrue(compRef.hasParentId) 

285 

286 # Create a Dataset type that has the same name but is inconsistent. 

287 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions, 

288 self.storageClassFactory.getStorageClass("Config")) 

289 

290 # Getting with a dataset type that does not match registry fails 

291 with self.assertRaises(ValueError): 

292 butler.get(inconsistentDatasetType, dataId) 

293 

294 # Combining a DatasetRef with a dataId should fail 

295 with self.assertRaises(ValueError): 

296 butler.get(ref, dataId) 

297 # Getting with an explicit ref should fail if the id doesn't match 

298 with self.assertRaises(ValueError): 

299 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

300 

301 # Getting a dataset with unknown parameters should fail 

302 with self.assertRaises(KeyError): 

303 butler.get(ref, parameters={"unsupported": True}) 

304 

305 # Check we have a collection 

306 collections = set(butler.registry.queryCollections()) 

307 self.assertEqual(collections, {run, tag}) 

308 

309 # Clean up to check that we can remove something that may have 

310 # already had a component removed 

311 butler.pruneDatasets([ref], unstore=True, purge=True) 

312 

313 # Add a dataset back in since some downstream tests require 

314 # something to be present 

315 ref = butler.put(metric, refIn) 

316 

317 return butler 

318 

319 def testDeferredCollectionPassing(self): 

320 # Construct a butler with no run or collection, but make it writeable. 

321 butler = Butler(self.tmpConfigFile, writeable=True) 

322 # Create and register a DatasetType 

323 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

324 datasetType = self.addDatasetType("example", dimensions, 

325 self.storageClassFactory.getStorageClass("StructuredData"), 

326 butler.registry) 

327 # Add needed Dimensions 

328 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

329 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

330 "name": "d-r", 

331 "abstract_filter": "R"}) 

332 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

333 "name": "fourtwentythree", "physical_filter": "d-r"}) 

334 dataId = {"instrument": "DummyCamComp", "visit": 423} 

335 # Create dataset. 

336 metric = makeExampleMetrics() 

337 # Register a new run and put dataset. 

338 run = "deferred" 

339 butler.registry.registerRun(run) 

340 ref = butler.put(metric, datasetType, dataId, run=run) 

341 # Putting with no run should fail with TypeError. 

342 with self.assertRaises(TypeError): 

343 butler.put(metric, datasetType, dataId) 

344 # Dataset should exist. 

345 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

346 # We should be able to get the dataset back, but with and without 

347 # a deferred dataset handle. 

348 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

349 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

350 # Trying to find the dataset without any collection is a TypeError. 

351 with self.assertRaises(TypeError): 

352 butler.datasetExists(datasetType, dataId) 

353 with self.assertRaises(TypeError): 

354 butler.get(datasetType, dataId) 

355 # Associate the dataset with a different collection. 

356 butler.registry.registerCollection("tagged") 

357 butler.registry.associate("tagged", [ref]) 

358 # Deleting the dataset from the new collection should make it findable 

359 # in the original collection. 

360 butler.pruneDatasets([ref], tags=["tagged"]) 

361 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

362 

363 

364class ButlerTests(ButlerPutGetTests): 

365 """Tests for Butler. 

366 """ 

367 useTempRoot = True 

368 

369 def setUp(self): 

370 """Create a new butler root for each test.""" 

371 if self.useTempRoot: 

372 self.root = tempfile.mkdtemp(dir=TESTDIR) 

373 Butler.makeRepo(self.root, config=Config(self.configFile)) 

374 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

375 else: 

376 self.root = None 

377 self.tmpConfigFile = self.configFile 

378 

379 def testConstructor(self): 

380 """Independent test of constructor. 

381 """ 

382 butler = Butler(self.tmpConfigFile, run="ingest") 

383 self.assertIsInstance(butler, Butler) 

384 

385 collections = set(butler.registry.queryCollections()) 

386 self.assertEqual(collections, {"ingest"}) 

387 

388 butler2 = Butler(butler=butler, collections=["other"]) 

389 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"])) 

390 self.assertIsNone(butler2.run) 

391 self.assertIs(butler.registry, butler2.registry) 

392 self.assertIs(butler.datastore, butler2.datastore) 

393 

394 def testBasicPutGet(self): 

395 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

396 self.runPutGetTest(storageClass, "test_metric") 

397 

398 def testCompositePutGetConcrete(self): 

399 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

400 butler = self.runPutGetTest(storageClass, "test_metric") 

401 

402 # Should *not* be disassembled 

403 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

404 self.assertEqual(len(datasets), 1) 

405 uri, components = butler.getURIs(datasets[0]) 

406 self.assertIsInstance(uri, ButlerURI) 

407 self.assertFalse(components) 

408 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

409 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

410 

411 # Predicted dataset 

412 dataId = {"instrument": "DummyCamComp", "visit": 424} 

413 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

414 self.assertFalse(components) 

415 self.assertIsInstance(uri, ButlerURI) 

416 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

417 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

418 

419 def testCompositePutGetVirtual(self): 

420 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

421 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

422 

423 # Should be disassembled 

424 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

425 self.assertEqual(len(datasets), 1) 

426 uri, components = butler.getURIs(datasets[0]) 

427 

428 if butler.datastore.isEphemeral: 

429 # Never disassemble in-memory datastore 

430 self.assertIsInstance(uri, ButlerURI) 

431 self.assertFalse(components) 

432 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

433 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

434 else: 

435 self.assertIsNone(uri) 

436 self.assertEqual(set(components), set(storageClass.components)) 

437 for compuri in components.values(): 

438 self.assertIsInstance(compuri, ButlerURI) 

439 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

440 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

441 

442 # Predicted dataset 

443 dataId = {"instrument": "DummyCamComp", "visit": 424} 

444 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

445 

446 if butler.datastore.isEphemeral: 

447 # Never disassembled 

448 self.assertIsInstance(uri, ButlerURI) 

449 self.assertFalse(components) 

450 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

451 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

452 else: 

453 self.assertIsNone(uri) 

454 self.assertEqual(set(components), set(storageClass.components)) 

455 for compuri in components.values(): 

456 self.assertIsInstance(compuri, ButlerURI) 

457 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

458 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

459 

460 def testIngest(self): 

461 butler = Butler(self.tmpConfigFile, run="ingest") 

462 

463 # Create and register a DatasetType 

464 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

465 

466 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

467 datasetTypeName = "metric" 

468 

469 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

470 

471 # Add needed Dimensions 

472 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

473 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

474 "name": "d-r", 

475 "abstract_filter": "R"}) 

476 for detector in (1, 2): 

477 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

478 "full_name": f"detector{detector}"}) 

479 

480 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

481 "name": "fourtwentythree", "physical_filter": "d-r"}, 

482 {"instrument": "DummyCamComp", "id": 424, 

483 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

484 

485 formatter = doImport("lsst.daf.butler.formatters.yamlFormatter.YamlFormatter") 

486 dataRoot = os.path.join(TESTDIR, "data", "basic") 

487 datasets = [] 

488 for detector in (1, 2): 

489 detector_name = f"detector_{detector}" 

490 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

491 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

492 # Create a DatasetRef for ingest 

493 refIn = DatasetRef(datasetType, dataId, id=None) 

494 

495 datasets.append(FileDataset(path=metricFile, 

496 refs=[refIn], 

497 formatter=formatter)) 

498 

499 butler.ingest(*datasets, transfer="copy") 

500 

501 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

502 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

503 

504 metrics1 = butler.get(datasetTypeName, dataId1) 

505 metrics2 = butler.get(datasetTypeName, dataId2) 

506 self.assertNotEqual(metrics1, metrics2) 

507 

508 # Compare URIs 

509 uri1 = butler.getURI(datasetTypeName, dataId1) 

510 uri2 = butler.getURI(datasetTypeName, dataId2) 

511 self.assertNotEqual(uri1, uri2) 

512 

513 # Now do a multi-dataset but single file ingest 

514 metricFile = os.path.join(dataRoot, "detectors.yaml") 

515 refs = [] 

516 for detector in (1, 2): 

517 detector_name = f"detector_{detector}" 

518 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

519 # Create a DatasetRef for ingest 

520 refs.append(DatasetRef(datasetType, dataId, id=None)) 

521 

522 datasets = [] 

523 datasets.append(FileDataset(path=metricFile, 

524 refs=refs, 

525 formatter=MultiDetectorFormatter)) 

526 

527 butler.ingest(*datasets, transfer="copy") 

528 

529 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

530 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

531 

532 multi1 = butler.get(datasetTypeName, dataId1) 

533 multi2 = butler.get(datasetTypeName, dataId2) 

534 

535 self.assertEqual(multi1, metrics1) 

536 self.assertEqual(multi2, metrics2) 

537 

538 # Compare URIs 

539 uri1 = butler.getURI(datasetTypeName, dataId1) 

540 uri2 = butler.getURI(datasetTypeName, dataId2) 

541 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

542 

543 # Test that removing one does not break the second 

544 # This line will issue a warning log message for a ChainedDatastore 

545 # that uses an InMemoryDatastore since in-memory can not ingest 

546 # files. 

547 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

548 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

549 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

550 multi2b = butler.get(datasetTypeName, dataId2) 

551 self.assertEqual(multi2, multi2b) 

552 

553 def testPruneCollections(self): 

554 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

555 butler = Butler(self.tmpConfigFile, writeable=True) 

556 # Load registry data with dimensions to hang datasets off of. 

557 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

558 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

559 # Add some RUN-type collections. 

560 run1 = "run1" 

561 butler.registry.registerRun(run1) 

562 run2 = "run2" 

563 butler.registry.registerRun(run2) 

564 # put some datasets. ref1 and ref2 have the same data ID, and are in 

565 # different runs. ref3 has a different data ID. 

566 metric = makeExampleMetrics() 

567 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

568 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

569 butler.registry) 

570 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

571 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

572 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

573 # Try to delete a RUN collection without purge, or with purge and not 

574 # unstore. 

575 with self.assertRaises(TypeError): 

576 butler.pruneCollection(run1) 

577 with self.assertRaises(TypeError): 

578 butler.pruneCollection(run2, purge=True) 

579 # Add a TAGGED collection and associate ref3 only into it. 

580 tag1 = "tag1" 

581 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

582 butler.registry.associate(tag1, [ref3]) 

583 # Add a CHAINED collection that searches run1 and then run2. It 

584 # logically contains only ref1, because ref2 is shadowed due to them 

585 # having the same data ID and dataset type. 

586 chain1 = "chain1" 

587 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

588 butler.registry.setCollectionChain(chain1, [run1, run2]) 

589 # Try to delete RUN collections, which should fail with complete 

590 # rollback because they're still referenced by the CHAINED 

591 # collection. 

592 with self.assertRaises(Exception): 

593 butler.pruneCollection(run1, pruge=True, unstore=True) 

594 with self.assertRaises(Exception): 

595 butler.pruneCollection(run2, pruge=True, unstore=True) 

596 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

597 [ref1, ref2, ref3]) 

598 self.assertTrue(butler.datastore.exists(ref1)) 

599 self.assertTrue(butler.datastore.exists(ref2)) 

600 self.assertTrue(butler.datastore.exists(ref3)) 

601 # Try to delete CHAINED and TAGGED collections with purge; should not 

602 # work. 

603 with self.assertRaises(TypeError): 

604 butler.pruneCollection(tag1, purge=True, unstore=True) 

605 with self.assertRaises(TypeError): 

606 butler.pruneCollection(chain1, purge=True, unstore=True) 

607 # Remove the tagged collection with unstore=False. This should not 

608 # affect the datasets. 

609 butler.pruneCollection(tag1) 

610 with self.assertRaises(MissingCollectionError): 

611 butler.registry.getCollectionType(tag1) 

612 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

613 [ref1, ref2, ref3]) 

614 self.assertTrue(butler.datastore.exists(ref1)) 

615 self.assertTrue(butler.datastore.exists(ref2)) 

616 self.assertTrue(butler.datastore.exists(ref3)) 

617 # Add the tagged collection back in, and remove it with unstore=True. 

618 # This should remove ref3 only from the datastore. 

619 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

620 butler.registry.associate(tag1, [ref3]) 

621 butler.pruneCollection(tag1, unstore=True) 

622 with self.assertRaises(MissingCollectionError): 

623 butler.registry.getCollectionType(tag1) 

624 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

625 [ref1, ref2, ref3]) 

626 self.assertTrue(butler.datastore.exists(ref1)) 

627 self.assertTrue(butler.datastore.exists(ref2)) 

628 self.assertFalse(butler.datastore.exists(ref3)) 

629 # Delete the chain with unstore=False. The datasets should not be 

630 # affected at all. 

631 butler.pruneCollection(chain1) 

632 with self.assertRaises(MissingCollectionError): 

633 butler.registry.getCollectionType(chain1) 

634 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

635 [ref1, ref2, ref3]) 

636 self.assertTrue(butler.datastore.exists(ref1)) 

637 self.assertTrue(butler.datastore.exists(ref2)) 

638 self.assertFalse(butler.datastore.exists(ref3)) 

639 # Redefine and then delete the chain with unstore=True. Only ref1 

640 # should be unstored (ref3 has already been unstored, but otherwise 

641 # would be now). 

642 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

643 butler.registry.setCollectionChain(chain1, [run1, run2]) 

644 butler.pruneCollection(chain1, unstore=True) 

645 with self.assertRaises(MissingCollectionError): 

646 butler.registry.getCollectionType(chain1) 

647 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

648 [ref1, ref2, ref3]) 

649 self.assertFalse(butler.datastore.exists(ref1)) 

650 self.assertTrue(butler.datastore.exists(ref2)) 

651 self.assertFalse(butler.datastore.exists(ref3)) 

652 # Remove run1. This removes ref1 and ref3 from the registry (they're 

653 # already gone from the datastore, which is fine). 

654 butler.pruneCollection(run1, purge=True, unstore=True) 

655 with self.assertRaises(MissingCollectionError): 

656 butler.registry.getCollectionType(run1) 

657 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

658 [ref2]) 

659 self.assertTrue(butler.datastore.exists(ref2)) 

660 # Remove run2. This removes ref2 from the registry and the datastore. 

661 butler.pruneCollection(run2, purge=True, unstore=True) 

662 with self.assertRaises(MissingCollectionError): 

663 butler.registry.getCollectionType(run2) 

664 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

665 []) 

666 

667 def testPickle(self): 

668 """Test pickle support. 

669 """ 

670 butler = Butler(self.tmpConfigFile, run="ingest") 

671 butlerOut = pickle.loads(pickle.dumps(butler)) 

672 self.assertIsInstance(butlerOut, Butler) 

673 self.assertEqual(butlerOut._config, butler._config) 

674 self.assertEqual(butlerOut.collections, butler.collections) 

675 self.assertEqual(butlerOut.run, butler.run) 

676 

677 def testGetDatasetTypes(self): 

678 butler = Butler(self.tmpConfigFile, run="ingest") 

679 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

680 dimensionEntries = [ 

681 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

682 {"instrument": "DummyCamComp"}), 

683 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}), 

684 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

685 ] 

686 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

687 # Add needed Dimensions 

688 for args in dimensionEntries: 

689 butler.registry.insertDimensionData(*args) 

690 

691 # When a DatasetType is added to the registry entries are created 

692 # for each component. Need entries for each component in the test 

693 # configuration otherwise validation won't work. The ones that 

694 # are deliberately broken will be ignored later. 

695 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi"} 

696 components = set() 

697 for datasetTypeName in datasetTypeNames: 

698 # Create and register a DatasetType 

699 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

700 

701 for componentName in storageClass.components: 

702 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

703 

704 fromRegistry = set(butler.registry.queryDatasetTypes()) 

705 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

706 

707 # Now that we have some dataset types registered, validate them 

708 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

709 "datasetType.component"]) 

710 

711 # Add a new datasetType that will fail template validation 

712 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

713 if self.validationCanFail: 

714 with self.assertRaises(ValidationError): 

715 butler.validateConfiguration() 

716 

717 # Rerun validation but with a subset of dataset type names 

718 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

719 

720 # Rerun validation but ignore the bad datasetType 

721 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

722 "datasetType.component"]) 

723 

724 def testTransaction(self): 

725 butler = Butler(self.tmpConfigFile, run="ingest") 

726 datasetTypeName = "test_metric" 

727 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

728 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

729 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

730 "abstract_filter": "R"}), 

731 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

732 "physical_filter": "d-r"})) 

733 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

734 metric = makeExampleMetrics() 

735 dataId = {"instrument": "DummyCam", "visit": 42} 

736 # Create and register a DatasetType 

737 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

738 with self.assertRaises(TransactionTestError): 

739 with butler.transaction(): 

740 # Add needed Dimensions 

741 for args in dimensionEntries: 

742 butler.registry.insertDimensionData(*args) 

743 # Store a dataset 

744 ref = butler.put(metric, datasetTypeName, dataId) 

745 self.assertIsInstance(ref, DatasetRef) 

746 # Test getDirect 

747 metricOut = butler.getDirect(ref) 

748 self.assertEqual(metric, metricOut) 

749 # Test get 

750 metricOut = butler.get(datasetTypeName, dataId) 

751 self.assertEqual(metric, metricOut) 

752 # Check we can get components 

753 self.assertGetComponents(butler, ref, 

754 ("summary", "data", "output"), metric) 

755 raise TransactionTestError("This should roll back the entire transaction") 

756 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"): 

757 butler.registry.expandDataId(dataId) 

758 # Should raise LookupError for missing data ID value 

759 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

760 butler.get(datasetTypeName, dataId) 

761 # Also check explicitly if Dataset entry is missing 

762 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

763 # Direct retrieval should not find the file in the Datastore 

764 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

765 butler.getDirect(ref) 

766 

767 def testMakeRepo(self): 

768 """Test that we can write butler configuration to a new repository via 

769 the Butler.makeRepo interface and then instantiate a butler from the 

770 repo root. 

771 """ 

772 # Do not run the test if we know this datastore configuration does 

773 # not support a file system root 

774 if self.fullConfigKey is None: 

775 return 

776 

777 # Remove the file created in setUp 

778 os.unlink(self.tmpConfigFile) 

779 

780 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile)) 

781 limited = Config(self.configFile) 

782 butler1 = Butler(butlerConfig) 

783 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

784 config=Config(self.configFile), overwrite=True) 

785 full = Config(self.tmpConfigFile) 

786 butler2 = Butler(butlerConfig) 

787 # Butlers should have the same configuration regardless of whether 

788 # defaults were expanded. 

789 self.assertEqual(butler1._config, butler2._config) 

790 # Config files loaded directly should not be the same. 

791 self.assertNotEqual(limited, full) 

792 # Make sure "limited" doesn't have a few keys we know it should be 

793 # inheriting from defaults. 

794 self.assertIn(self.fullConfigKey, full) 

795 self.assertNotIn(self.fullConfigKey, limited) 

796 

797 # Collections don't appear until something is put in them 

798 collections1 = set(butler1.registry.queryCollections()) 

799 self.assertEqual(collections1, set()) 

800 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

801 

802 # Check that a config with no associated file name will not 

803 # work properly with relocatable Butler repo 

804 butlerConfig.configFile = None 

805 with self.assertRaises(ValueError): 

806 Butler(butlerConfig) 

807 

808 with self.assertRaises(FileExistsError): 

809 Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

810 config=Config(self.configFile), overwrite=False) 

811 

812 def testStringification(self): 

813 butler = Butler(self.tmpConfigFile, run="ingest") 

814 butlerStr = str(butler) 

815 

816 if self.datastoreStr is not None: 

817 for testStr in self.datastoreStr: 

818 self.assertIn(testStr, butlerStr) 

819 if self.registryStr is not None: 

820 self.assertIn(self.registryStr, butlerStr) 

821 

822 datastoreName = butler.datastore.name 

823 if self.datastoreName is not None: 

824 for testStr in self.datastoreName: 

825 self.assertIn(testStr, datastoreName) 

826 

827 

828class FileLikeDatastoreButlerTests(ButlerTests): 

829 """Common tests and specialization of ButlerTests for butlers backed 

830 by datastores that inherit from FileLikeDatastore. 

831 """ 

832 

833 def checkFileExists(self, root, path): 

834 """Checks if file exists at a given path (relative to root). 

835 

836 Test testPutTemplates verifies actual physical existance of the files 

837 in the requested location. For POSIXDatastore this test is equivalent 

838 to `os.path.exists` call. 

839 """ 

840 return os.path.exists(os.path.join(root, path)) 

841 

842 def testPutTemplates(self): 

843 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

844 butler = Butler(self.tmpConfigFile, run="ingest") 

845 

846 # Add needed Dimensions 

847 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

848 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

849 "name": "d-r", 

850 "abstract_filter": "R"}) 

851 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

852 "physical_filter": "d-r"}) 

853 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

854 "physical_filter": "d-r"}) 

855 

856 # Create and store a dataset 

857 metric = makeExampleMetrics() 

858 

859 # Create two almost-identical DatasetTypes (both will use default 

860 # template) 

861 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

862 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

863 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

864 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

865 

866 dataId1 = {"instrument": "DummyCamComp", "visit": np.int64(423)} 

867 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

868 dataId3 = {"instrument": "DummyCamComp", "visit": 425} 

869 

870 # Put with exactly the data ID keys needed 

871 ref = butler.put(metric, "metric1", dataId1) 

872 self.assertTrue(self.checkFileExists(butler.datastore.root, 

873 "ingest/metric1/d-r/DummyCamComp_423.pickle")) 

874 

875 # Check the template based on dimensions 

876 butler.datastore.templates.validateTemplates([ref]) 

877 

878 # Put with extra data ID keys (physical_filter is an optional 

879 # dependency); should not change template (at least the way we're 

880 # defining them to behave now; the important thing is that they 

881 # must be consistent). 

882 ref = butler.put(metric, "metric2", dataId2) 

883 self.assertTrue(self.checkFileExists(butler.datastore.root, 

884 "ingest/metric2/d-r/DummyCamComp_v423.pickle")) 

885 

886 # Check the template based on dimensions 

887 butler.datastore.templates.validateTemplates([ref]) 

888 

889 # Now use a file template that will not result in unique filenames 

890 ref = butler.put(metric, "metric3", dataId1) 

891 

892 # Check the template based on dimensions. This one is a bad template 

893 with self.assertRaises(FileTemplateValidationError): 

894 butler.datastore.templates.validateTemplates([ref]) 

895 

896 with self.assertRaises(FileExistsError): 

897 butler.put(metric, "metric3", dataId3) 

898 

899 def testImportExport(self): 

900 # Run put/get tests just to create and populate a repo. 

901 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

902 self.runImportExportTest(storageClass) 

903 

904 @unittest.expectedFailure 

905 def testImportExportVirtualComposite(self): 

906 # Run put/get tests just to create and populate a repo. 

907 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

908 self.runImportExportTest(storageClass) 

909 

910 def runImportExportTest(self, storageClass): 

911 exportButler = self.runPutGetTest(storageClass, "test_metric") 

912 # Test that the repo actually has at least one dataset. 

913 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

914 self.assertGreater(len(datasets), 0) 

915 # Export those datasets. We used TemporaryDirectory because there 

916 # doesn't seem to be a way to get the filename (as opposed to the file 

917 # object) from any of tempfile's temporary-file context managers. 

918 with tempfile.TemporaryDirectory() as exportDir: 

919 # TODO: When PosixDatastore supports transfer-on-exist, add tests 

920 # for that. 

921 exportFile = os.path.join(exportDir, "exports.yaml") 

922 with exportButler.export(filename=exportFile) as export: 

923 export.saveDatasets(datasets) 

924 self.assertTrue(os.path.exists(exportFile)) 

925 with tempfile.TemporaryDirectory() as importDir: 

926 Butler.makeRepo(importDir, config=Config(self.configFile)) 

927 importButler = Butler(importDir, run="ingest/run") 

928 importButler.import_(filename=exportFile, directory=exportButler.datastore.root, 

929 transfer="symlink") 

930 for ref in datasets: 

931 with self.subTest(ref=ref): 

932 # Test for existence by passing in the DatasetType and 

933 # data ID separately, to avoid lookup by dataset_id. 

934 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

935 

936 

937class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

938 """PosixDatastore specialization of a butler""" 

939 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

940 fullConfigKey = ".datastore.formatters" 

941 validationCanFail = True 

942 datastoreStr = ["/tmp"] 

943 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"] 

944 registryStr = "/gen3.sqlite3" 

945 

946 

947class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

948 """InMemoryDatastore specialization of a butler""" 

949 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

950 fullConfigKey = None 

951 useTempRoot = False 

952 validationCanFail = False 

953 datastoreStr = ["datastore='InMemory"] 

954 datastoreName = ["InMemoryDatastore@"] 

955 registryStr = ":memory:" 

956 

957 def testIngest(self): 

958 pass 

959 

960 

961class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

962 """PosixDatastore specialization""" 

963 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

964 fullConfigKey = ".datastore.datastores.1.formatters" 

965 validationCanFail = True 

966 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"] 

967 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1", 

968 "SecondDatastore"] 

969 registryStr = "/gen3.sqlite3" 

970 

971 

972class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

973 """Test that a yaml file in one location can refer to a root in another.""" 

974 

975 datastoreStr = ["dir1"] 

976 # Disable the makeRepo test since we are deliberately not using 

977 # butler.yaml as the config name. 

978 fullConfigKey = None 

979 

980 def setUp(self): 

981 self.root = tempfile.mkdtemp(dir=TESTDIR) 

982 

983 # Make a new repository in one place 

984 self.dir1 = os.path.join(self.root, "dir1") 

985 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

986 

987 # Move the yaml file to a different place and add a "root" 

988 self.dir2 = os.path.join(self.root, "dir2") 

989 safeMakeDir(self.dir2) 

990 configFile1 = os.path.join(self.dir1, "butler.yaml") 

991 config = Config(configFile1) 

992 config["root"] = self.dir1 

993 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

994 config.dumpToFile(configFile2) 

995 os.remove(configFile1) 

996 self.tmpConfigFile = configFile2 

997 

998 def testFileLocations(self): 

999 self.assertNotEqual(self.dir1, self.dir2) 

1000 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1001 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1002 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1003 

1004 

1005class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1006 """Test that a config file created by makeRepo outside of repo works.""" 

1007 

1008 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1009 

1010 def setUp(self): 

1011 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1012 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1013 

1014 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1015 Butler.makeRepo(self.root, config=Config(self.configFile), 

1016 outfile=self.tmpConfigFile) 

1017 

1018 def tearDown(self): 

1019 if os.path.exists(self.root2): 

1020 shutil.rmtree(self.root2, ignore_errors=True) 

1021 super().tearDown() 

1022 

1023 def testConfigExistence(self): 

1024 c = Config(self.tmpConfigFile) 

1025 uri_config = ButlerURI(c["root"]) 

1026 uri_expected = ButlerURI(self.root, forceDirectory=True) 

1027 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1028 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1029 

1030 def testPutGet(self): 

1031 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1032 self.runPutGetTest(storageClass, "test_metric") 

1033 

1034 

1035class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1036 """Test that a config file created by makeRepo outside of repo works.""" 

1037 

1038 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1039 

1040 def setUp(self): 

1041 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1042 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1043 

1044 self.tmpConfigFile = self.root2 

1045 Butler.makeRepo(self.root, config=Config(self.configFile), 

1046 outfile=self.tmpConfigFile) 

1047 

1048 def testConfigExistence(self): 

1049 # Append the yaml file else Config constructor does not know the file 

1050 # type. 

1051 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1052 super().testConfigExistence() 

1053 

1054 

1055class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1056 """Test that a config file created by makeRepo outside of repo works.""" 

1057 

1058 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1059 

1060 def setUp(self): 

1061 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1062 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1063 

1064 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

1065 Butler.makeRepo(self.root, config=Config(self.configFile), 

1066 outfile=self.tmpConfigFile) 

1067 

1068 

1069@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1070@mock_s3 

1071class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

1072 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1073 a local in-memory SqlRegistry. 

1074 """ 

1075 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1076 fullConfigKey = None 

1077 validationCanFail = True 

1078 

1079 bucketName = "anybucketname" 

1080 """Name of the Bucket that will be used in the tests. The name is read from 

1081 the config file used with the tests during set-up. 

1082 """ 

1083 

1084 root = "butlerRoot/" 

1085 """Root repository directory expected to be used in case useTempRoot=False. 

1086 Otherwise the root is set to a 20 characters long randomly generated string 

1087 during set-up. 

1088 """ 

1089 

1090 datastoreStr = [f"datastore={root}"] 

1091 """Contains all expected root locations in a format expected to be 

1092 returned by Butler stringification. 

1093 """ 

1094 

1095 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"] 

1096 """The expected format of the S3Datastore string.""" 

1097 

1098 registryStr = ":memory:" 

1099 """Expected format of the Registry string.""" 

1100 

1101 def genRoot(self): 

1102 """Returns a random string of len 20 to serve as a root 

1103 name for the temporary bucket repo. 

1104 

1105 This is equivalent to tempfile.mkdtemp as this is what self.root 

1106 becomes when useTempRoot is True. 

1107 """ 

1108 rndstr = "".join( 

1109 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1110 ) 

1111 return rndstr + "/" 

1112 

1113 def setUp(self): 

1114 config = Config(self.configFile) 

1115 uri = ButlerURI(config[".datastore.datastore.root"]) 

1116 self.bucketName = uri.netloc 

1117 

1118 # set up some fake credentials if they do not exist 

1119 self.usingDummyCredentials = setAwsEnvCredentials() 

1120 

1121 if self.useTempRoot: 

1122 self.root = self.genRoot() 

1123 rooturi = f"s3://{self.bucketName}/{self.root}" 

1124 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1125 

1126 # MOTO needs to know that we expect Bucket bucketname to exist 

1127 # (this used to be the class attribute bucketName) 

1128 s3 = boto3.resource("s3") 

1129 s3.create_bucket(Bucket=self.bucketName) 

1130 

1131 self.datastoreStr = f"datastore={self.root}" 

1132 self.datastoreName = [f"S3Datastore@{rooturi}"] 

1133 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1134 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1135 

1136 def tearDown(self): 

1137 s3 = boto3.resource("s3") 

1138 bucket = s3.Bucket(self.bucketName) 

1139 try: 

1140 bucket.objects.all().delete() 

1141 except botocore.exceptions.ClientError as e: 

1142 if e.response["Error"]["Code"] == "404": 

1143 # the key was not reachable - pass 

1144 pass 

1145 else: 

1146 raise 

1147 

1148 bucket = s3.Bucket(self.bucketName) 

1149 bucket.delete() 

1150 

1151 # unset any potentially set dummy credentials 

1152 if self.usingDummyCredentials: 

1153 unsetAwsEnvCredentials() 

1154 

1155 def checkFileExists(self, root, relpath): 

1156 """Checks if file exists at a given path (relative to root). 

1157 

1158 Test testPutTemplates verifies actual physical existance of the files 

1159 in the requested location. For S3Datastore this test is equivalent to 

1160 `lsst.daf.butler.core.s3utils.s3checkFileExists` call. 

1161 """ 

1162 uri = ButlerURI(root) 

1163 uri.updateFile(relpath) 

1164 return s3CheckFileExists(uri)[0] 

1165 

1166 @unittest.expectedFailure 

1167 def testImportExport(self): 

1168 super().testImportExport() 

1169 

1170 

1171if __name__ == "__main__": 1171 ↛ 1172line 1171 didn't jump to line 1172, because the condition on line 1171 was never true

1172 unittest.main()