Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import os 

26import posixpath 

27import unittest 

28import tempfile 

29import shutil 

30import pickle 

31import string 

32import random 

33import numpy as np 

34 

35try: 

36 import boto3 

37 import botocore 

38 from moto import mock_s3 

39except ImportError: 

40 boto3 = None 

41 

42 def mock_s3(cls): 

43 """A no-op decorator in case moto mock_s3 can not be imported. 

44 """ 

45 return cls 

46 

47from lsst.utils import doImport 

48from lsst.daf.butler.core.utils import safeMakeDir 

49from lsst.daf.butler import Butler, Config, ButlerConfig 

50from lsst.daf.butler import StorageClassFactory 

51from lsst.daf.butler import DatasetType, DatasetRef 

52from lsst.daf.butler import FileTemplateValidationError, ValidationError 

53from lsst.daf.butler import FileDataset 

54from lsst.daf.butler import CollectionSearch, CollectionType 

55from lsst.daf.butler import ButlerURI 

56from lsst.daf.butler.registry import MissingCollectionError 

57from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

58from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials, 

59 unsetAwsEnvCredentials) 

60 

61from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

62 

63TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

64 

65 

66def makeExampleMetrics(): 

67 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

68 {"a": [1, 2, 3], 

69 "b": {"blue": 5, "red": "green"}}, 

70 [563, 234, 456.7, 752, 8, 9, 27] 

71 ) 

72 

73 

74class TransactionTestError(Exception): 

75 """Specific error for testing transactions, to prevent misdiagnosing 

76 that might otherwise occur when a standard exception is used. 

77 """ 

78 pass 

79 

80 

81class ButlerConfigTests(unittest.TestCase): 

82 """Simple tests for ButlerConfig that are not tested in other test cases. 

83 """ 

84 

85 def testSearchPath(self): 

86 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

87 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

88 config1 = ButlerConfig(configFile) 

89 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

90 

91 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

92 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

93 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

94 self.assertIn("testConfigs", "\n".join(cm.output)) 

95 

96 key = ("datastore", "records", "table") 

97 self.assertNotEqual(config1[key], config2[key]) 

98 self.assertEqual(config2[key], "override_record") 

99 

100 

101class ButlerPutGetTests: 

102 """Helper method for running a suite of put/get tests from different 

103 butler configurations.""" 

104 

105 root = None 

106 

107 @staticmethod 

108 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

109 """Create a DatasetType and register it 

110 """ 

111 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

112 registry.registerDatasetType(datasetType) 

113 return datasetType 

114 

115 @classmethod 

116 def setUpClass(cls): 

117 cls.storageClassFactory = StorageClassFactory() 

118 cls.storageClassFactory.addFromConfig(cls.configFile) 

119 

120 def assertGetComponents(self, butler, datasetRef, components, reference): 

121 datasetTypeName = datasetRef.datasetType.name 

122 dataId = datasetRef.dataId 

123 for component in components: 

124 compTypeName = DatasetType.nameWithComponent(datasetTypeName, component) 

125 result = butler.get(compTypeName, dataId) 

126 self.assertEqual(result, getattr(reference, component)) 

127 

128 def tearDown(self): 

129 if self.root is not None and os.path.exists(self.root): 

130 shutil.rmtree(self.root, ignore_errors=True) 

131 

132 def runPutGetTest(self, storageClass, datasetTypeName): 

133 # New datasets will be added to run and tag, but we will only look in 

134 # tag when looking up datasets. 

135 run = "ingest/run" 

136 tag = "ingest" 

137 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag]) 

138 

139 # There will not be a collection yet 

140 collections = set(butler.registry.queryCollections()) 

141 self.assertEqual(collections, set([run, tag])) 

142 

143 # Create and register a DatasetType 

144 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

145 

146 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

147 

148 # Try to create one that will have a name that is too long 

149 with self.assertRaises(Exception) as cm: 

150 self.addDatasetType("DatasetTypeNameTooLong" * 50, dimensions, storageClass, butler.registry) 

151 self.assertIn("check constraint", str(cm.exception).lower()) 

152 

153 # Add needed Dimensions 

154 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

155 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

156 "name": "d-r", 

157 "abstract_filter": "R"}) 

158 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp", 

159 "id": 1, 

160 "name": "default"}) 

161 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

162 "name": "fourtwentythree", "physical_filter": "d-r", 

163 "visit_system": 1}) 

164 

165 # Add a second visit for some later tests 

166 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424, 

167 "name": "fourtwentyfour", "physical_filter": "d-r", 

168 "visit_system": 1}) 

169 

170 # Create and store a dataset 

171 metric = makeExampleMetrics() 

172 dataId = {"instrument": "DummyCamComp", "visit": 423} 

173 

174 # Create a DatasetRef for put 

175 refIn = DatasetRef(datasetType, dataId, id=None) 

176 

177 # Put with a preexisting id should fail 

178 with self.assertRaises(ValueError): 

179 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

180 

181 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

182 # and once with a DatasetType 

183 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

184 with self.subTest(args=args): 

185 ref = butler.put(metric, *args) 

186 self.assertIsInstance(ref, DatasetRef) 

187 

188 # Test getDirect 

189 metricOut = butler.getDirect(ref) 

190 self.assertEqual(metric, metricOut) 

191 # Test get 

192 metricOut = butler.get(ref.datasetType.name, dataId) 

193 self.assertEqual(metric, metricOut) 

194 # Test get with a datasetRef 

195 metricOut = butler.get(ref) 

196 self.assertEqual(metric, metricOut) 

197 # Test getDeferred with dataId 

198 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get() 

199 self.assertEqual(metric, metricOut) 

200 # Test getDeferred with a datasetRef 

201 metricOut = butler.getDeferred(ref).get() 

202 self.assertEqual(metric, metricOut) 

203 

204 # Check we can get components 

205 if storageClass.isComposite(): 

206 self.assertGetComponents(butler, ref, 

207 ("summary", "data", "output"), metric) 

208 

209 # Remove from the tagged collection only; after that we 

210 # shouldn't be able to find it unless we use the dataset_id. 

211 butler.pruneDatasets([ref]) 

212 with self.assertRaises(LookupError): 

213 butler.datasetExists(*args) 

214 # Registry still knows about it, if we use the dataset_id. 

215 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

216 # If we use the output ref with the dataset_id, we should 

217 # still be able to load it with getDirect(). 

218 self.assertEqual(metric, butler.getDirect(ref)) 

219 

220 # Reinsert into collection, then delete from Datastore *and* 

221 # remove from collection. 

222 butler.registry.associate(tag, [ref]) 

223 butler.pruneDatasets([ref], unstore=True) 

224 # Lookup with original args should still fail. 

225 with self.assertRaises(LookupError): 

226 butler.datasetExists(*args) 

227 # Now getDirect() should fail, too. 

228 with self.assertRaises(FileNotFoundError, msg=f"Checking ref {ref} not found"): 

229 butler.getDirect(ref) 

230 # Registry still knows about it, if we use the dataset_id. 

231 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

232 

233 # Now remove the dataset completely. 

234 butler.pruneDatasets([ref], purge=True, unstore=True) 

235 # Lookup with original args should still fail. 

236 with self.assertRaises(LookupError): 

237 butler.datasetExists(*args) 

238 # getDirect() should still fail. 

239 with self.assertRaises(FileNotFoundError): 

240 butler.getDirect(ref) 

241 # Registry shouldn't be able to find it by dataset_id anymore. 

242 self.assertIsNone(butler.registry.getDataset(ref.id)) 

243 

244 # Put the dataset again, since the last thing we did was remove it. 

245 ref = butler.put(metric, refIn) 

246 

247 # Get with parameters 

248 stop = 4 

249 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

250 self.assertNotEqual(metric, sliced) 

251 self.assertEqual(metric.summary, sliced.summary) 

252 self.assertEqual(metric.output, sliced.output) 

253 self.assertEqual(metric.data[:stop], sliced.data) 

254 # getDeferred with parameters 

255 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

256 self.assertNotEqual(metric, sliced) 

257 self.assertEqual(metric.summary, sliced.summary) 

258 self.assertEqual(metric.output, sliced.output) 

259 self.assertEqual(metric.data[:stop], sliced.data) 

260 # getDeferred with deferred parameters 

261 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

262 self.assertNotEqual(metric, sliced) 

263 self.assertEqual(metric.summary, sliced.summary) 

264 self.assertEqual(metric.output, sliced.output) 

265 self.assertEqual(metric.data[:stop], sliced.data) 

266 

267 if storageClass.isComposite(): 

268 # Check that components can be retrieved 

269 # ref.compfonents will only be populated in certain cases 

270 metricOut = butler.get(ref.datasetType.name, dataId) 

271 compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary") 

272 compNameD = DatasetType.nameWithComponent(datasetTypeName, "data") 

273 summary = butler.get(compNameS, dataId) 

274 self.assertEqual(summary, metric.summary) 

275 data = butler.get(compNameD, dataId) 

276 self.assertEqual(data, metric.data) 

277 

278 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

279 self.assertTrue(compRef.hasParentId) 

280 

281 # Create a Dataset type that has the same name but is inconsistent. 

282 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions, 

283 self.storageClassFactory.getStorageClass("Config")) 

284 

285 # Getting with a dataset type that does not match registry fails 

286 with self.assertRaises(ValueError): 

287 butler.get(inconsistentDatasetType, dataId) 

288 

289 # Combining a DatasetRef with a dataId should fail 

290 with self.assertRaises(ValueError): 

291 butler.get(ref, dataId) 

292 # Getting with an explicit ref should fail if the id doesn't match 

293 with self.assertRaises(ValueError): 

294 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

295 

296 # Getting a dataset with unknown parameters should fail 

297 with self.assertRaises(KeyError): 

298 butler.get(ref, parameters={"unsupported": True}) 

299 

300 # Check we have a collection 

301 collections = set(butler.registry.queryCollections()) 

302 self.assertEqual(collections, {run, tag}) 

303 

304 # Clean up to check that we can remove something that may have 

305 # already had a component removed 

306 butler.pruneDatasets([ref], unstore=True, purge=True) 

307 

308 # Add a dataset back in since some downstream tests require 

309 # something to be present 

310 ref = butler.put(metric, refIn) 

311 

312 return butler 

313 

314 def testDeferredCollectionPassing(self): 

315 # Construct a butler with no run or collection, but make it writeable. 

316 butler = Butler(self.tmpConfigFile, writeable=True) 

317 # Create and register a DatasetType 

318 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

319 datasetType = self.addDatasetType("example", dimensions, 

320 self.storageClassFactory.getStorageClass("StructuredData"), 

321 butler.registry) 

322 # Add needed Dimensions 

323 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

324 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

325 "name": "d-r", 

326 "abstract_filter": "R"}) 

327 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

328 "name": "fourtwentythree", "physical_filter": "d-r"}) 

329 dataId = {"instrument": "DummyCamComp", "visit": 423} 

330 # Create dataset. 

331 metric = makeExampleMetrics() 

332 # Register a new run and put dataset. 

333 run = "deferred" 

334 butler.registry.registerRun(run) 

335 ref = butler.put(metric, datasetType, dataId, run=run) 

336 # Putting with no run should fail with TypeError. 

337 with self.assertRaises(TypeError): 

338 butler.put(metric, datasetType, dataId) 

339 # Dataset should exist. 

340 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

341 # We should be able to get the dataset back, but with and without 

342 # a deferred dataset handle. 

343 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

344 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

345 # Trying to find the dataset without any collection is a TypeError. 

346 with self.assertRaises(TypeError): 

347 butler.datasetExists(datasetType, dataId) 

348 with self.assertRaises(TypeError): 

349 butler.get(datasetType, dataId) 

350 # Associate the dataset with a different collection. 

351 butler.registry.registerCollection("tagged") 

352 butler.registry.associate("tagged", [ref]) 

353 # Deleting the dataset from the new collection should make it findable 

354 # in the original collection. 

355 butler.pruneDatasets([ref], tags=["tagged"]) 

356 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

357 

358 

359class ButlerTests(ButlerPutGetTests): 

360 """Tests for Butler. 

361 """ 

362 useTempRoot = True 

363 

364 def setUp(self): 

365 """Create a new butler root for each test.""" 

366 if self.useTempRoot: 

367 self.root = tempfile.mkdtemp(dir=TESTDIR) 

368 Butler.makeRepo(self.root, config=Config(self.configFile)) 

369 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

370 else: 

371 self.root = None 

372 self.tmpConfigFile = self.configFile 

373 

374 def testConstructor(self): 

375 """Independent test of constructor. 

376 """ 

377 butler = Butler(self.tmpConfigFile, run="ingest") 

378 self.assertIsInstance(butler, Butler) 

379 

380 collections = set(butler.registry.queryCollections()) 

381 self.assertEqual(collections, {"ingest"}) 

382 

383 butler2 = Butler(butler=butler, collections=["other"]) 

384 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"])) 

385 self.assertIsNone(butler2.run) 

386 self.assertIs(butler.registry, butler2.registry) 

387 self.assertIs(butler.datastore, butler2.datastore) 

388 

389 def testBasicPutGet(self): 

390 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

391 self.runPutGetTest(storageClass, "test_metric") 

392 

393 def testCompositePutGetConcrete(self): 

394 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

395 butler = self.runPutGetTest(storageClass, "test_metric") 

396 

397 # Should *not* be disassembled 

398 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

399 self.assertEqual(len(datasets), 1) 

400 uri, components = butler.getURIs(datasets[0]) 

401 self.assertIsInstance(uri, ButlerURI) 

402 self.assertFalse(components) 

403 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

404 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

405 

406 # Predicted dataset 

407 dataId = {"instrument": "DummyCamComp", "visit": 424} 

408 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

409 self.assertFalse(components) 

410 self.assertIsInstance(uri, ButlerURI) 

411 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

412 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

413 

414 def testCompositePutGetVirtual(self): 

415 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

416 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

417 

418 # Should be disassembled 

419 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

420 self.assertEqual(len(datasets), 1) 

421 uri, components = butler.getURIs(datasets[0]) 

422 

423 if butler.datastore.isEphemeral: 

424 # Never disassemble in-memory datastore 

425 self.assertIsInstance(uri, ButlerURI) 

426 self.assertFalse(components) 

427 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

428 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

429 else: 

430 self.assertIsNone(uri) 

431 self.assertEqual(set(components), set(storageClass.components)) 

432 for compuri in components.values(): 

433 self.assertIsInstance(compuri, ButlerURI) 

434 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

435 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

436 

437 # Predicted dataset 

438 dataId = {"instrument": "DummyCamComp", "visit": 424} 

439 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

440 

441 if butler.datastore.isEphemeral: 

442 # Never disassembled 

443 self.assertIsInstance(uri, ButlerURI) 

444 self.assertFalse(components) 

445 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

446 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

447 else: 

448 self.assertIsNone(uri) 

449 self.assertEqual(set(components), set(storageClass.components)) 

450 for compuri in components.values(): 

451 self.assertIsInstance(compuri, ButlerURI) 

452 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

453 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

454 

455 def testIngest(self): 

456 butler = Butler(self.tmpConfigFile, run="ingest") 

457 

458 # Create and register a DatasetType 

459 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

460 

461 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

462 datasetTypeName = "metric" 

463 

464 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

465 

466 # Add needed Dimensions 

467 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

468 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

469 "name": "d-r", 

470 "abstract_filter": "R"}) 

471 for detector in (1, 2): 

472 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

473 "full_name": f"detector{detector}"}) 

474 

475 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

476 "name": "fourtwentythree", "physical_filter": "d-r"}, 

477 {"instrument": "DummyCamComp", "id": 424, 

478 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

479 

480 formatter = doImport("lsst.daf.butler.formatters.yamlFormatter.YamlFormatter") 

481 dataRoot = os.path.join(TESTDIR, "data", "basic") 

482 datasets = [] 

483 for detector in (1, 2): 

484 detector_name = f"detector_{detector}" 

485 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

486 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

487 # Create a DatasetRef for ingest 

488 refIn = DatasetRef(datasetType, dataId, id=None) 

489 

490 datasets.append(FileDataset(path=metricFile, 

491 refs=[refIn], 

492 formatter=formatter)) 

493 

494 butler.ingest(*datasets, transfer="copy") 

495 

496 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

497 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

498 

499 metrics1 = butler.get(datasetTypeName, dataId1) 

500 metrics2 = butler.get(datasetTypeName, dataId2) 

501 self.assertNotEqual(metrics1, metrics2) 

502 

503 # Compare URIs 

504 uri1 = butler.getURI(datasetTypeName, dataId1) 

505 uri2 = butler.getURI(datasetTypeName, dataId2) 

506 self.assertNotEqual(uri1, uri2) 

507 

508 # Now do a multi-dataset but single file ingest 

509 metricFile = os.path.join(dataRoot, "detectors.yaml") 

510 refs = [] 

511 for detector in (1, 2): 

512 detector_name = f"detector_{detector}" 

513 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

514 # Create a DatasetRef for ingest 

515 refs.append(DatasetRef(datasetType, dataId, id=None)) 

516 

517 datasets = [] 

518 datasets.append(FileDataset(path=metricFile, 

519 refs=refs, 

520 formatter=MultiDetectorFormatter)) 

521 

522 butler.ingest(*datasets, transfer="copy") 

523 

524 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

525 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

526 

527 multi1 = butler.get(datasetTypeName, dataId1) 

528 multi2 = butler.get(datasetTypeName, dataId2) 

529 

530 self.assertEqual(multi1, metrics1) 

531 self.assertEqual(multi2, metrics2) 

532 

533 # Compare URIs 

534 uri1 = butler.getURI(datasetTypeName, dataId1) 

535 uri2 = butler.getURI(datasetTypeName, dataId2) 

536 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

537 

538 # Test that removing one does not break the second 

539 # This line will issue a warning log message for a ChainedDatastore 

540 # that uses an InMemoryDatastore since in-memory can not ingest 

541 # files. 

542 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

543 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

544 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

545 multi2b = butler.get(datasetTypeName, dataId2) 

546 self.assertEqual(multi2, multi2b) 

547 

548 def testPruneCollections(self): 

549 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

550 butler = Butler(self.tmpConfigFile, writeable=True) 

551 # Load registry data with dimensions to hang datasets off of. 

552 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

553 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

554 # Add some RUN-type collections. 

555 run1 = "run1" 

556 butler.registry.registerRun(run1) 

557 run2 = "run2" 

558 butler.registry.registerRun(run2) 

559 # put some datasets. ref1 and ref2 have the same data ID, and are in 

560 # different runs. ref3 has a different data ID. 

561 metric = makeExampleMetrics() 

562 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

563 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

564 butler.registry) 

565 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

566 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

567 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

568 # Try to delete a RUN collection without purge, or with purge and not 

569 # unstore. 

570 with self.assertRaises(TypeError): 

571 butler.pruneCollection(run1) 

572 with self.assertRaises(TypeError): 

573 butler.pruneCollection(run2, purge=True) 

574 # Add a TAGGED collection and associate ref3 only into it. 

575 tag1 = "tag1" 

576 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

577 butler.registry.associate(tag1, [ref3]) 

578 # Add a CHAINED collection that searches run1 and then run2. It 

579 # logically contains only ref1, because ref2 is shadowed due to them 

580 # having the same data ID and dataset type. 

581 chain1 = "chain1" 

582 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

583 butler.registry.setCollectionChain(chain1, [run1, run2]) 

584 # Try to delete RUN collections, which should fail with complete 

585 # rollback because they're still referenced by the CHAINED 

586 # collection. 

587 with self.assertRaises(Exception): 

588 butler.pruneCollection(run1, pruge=True, unstore=True) 

589 with self.assertRaises(Exception): 

590 butler.pruneCollection(run2, pruge=True, unstore=True) 

591 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

592 [ref1, ref2, ref3]) 

593 self.assertTrue(butler.datastore.exists(ref1)) 

594 self.assertTrue(butler.datastore.exists(ref2)) 

595 self.assertTrue(butler.datastore.exists(ref3)) 

596 # Try to delete CHAINED and TAGGED collections with purge; should not 

597 # work. 

598 with self.assertRaises(TypeError): 

599 butler.pruneCollection(tag1, purge=True, unstore=True) 

600 with self.assertRaises(TypeError): 

601 butler.pruneCollection(chain1, purge=True, unstore=True) 

602 # Remove the tagged collection with unstore=False. This should not 

603 # affect the datasets. 

604 butler.pruneCollection(tag1) 

605 with self.assertRaises(MissingCollectionError): 

606 butler.registry.getCollectionType(tag1) 

607 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

608 [ref1, ref2, ref3]) 

609 self.assertTrue(butler.datastore.exists(ref1)) 

610 self.assertTrue(butler.datastore.exists(ref2)) 

611 self.assertTrue(butler.datastore.exists(ref3)) 

612 # Add the tagged collection back in, and remove it with unstore=True. 

613 # This should remove ref3 only from the datastore. 

614 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

615 butler.registry.associate(tag1, [ref3]) 

616 butler.pruneCollection(tag1, unstore=True) 

617 with self.assertRaises(MissingCollectionError): 

618 butler.registry.getCollectionType(tag1) 

619 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

620 [ref1, ref2, ref3]) 

621 self.assertTrue(butler.datastore.exists(ref1)) 

622 self.assertTrue(butler.datastore.exists(ref2)) 

623 self.assertFalse(butler.datastore.exists(ref3)) 

624 # Delete the chain with unstore=False. The datasets should not be 

625 # affected at all. 

626 butler.pruneCollection(chain1) 

627 with self.assertRaises(MissingCollectionError): 

628 butler.registry.getCollectionType(chain1) 

629 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

630 [ref1, ref2, ref3]) 

631 self.assertTrue(butler.datastore.exists(ref1)) 

632 self.assertTrue(butler.datastore.exists(ref2)) 

633 self.assertFalse(butler.datastore.exists(ref3)) 

634 # Redefine and then delete the chain with unstore=True. Only ref1 

635 # should be unstored (ref3 has already been unstored, but otherwise 

636 # would be now). 

637 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

638 butler.registry.setCollectionChain(chain1, [run1, run2]) 

639 butler.pruneCollection(chain1, unstore=True) 

640 with self.assertRaises(MissingCollectionError): 

641 butler.registry.getCollectionType(chain1) 

642 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

643 [ref1, ref2, ref3]) 

644 self.assertFalse(butler.datastore.exists(ref1)) 

645 self.assertTrue(butler.datastore.exists(ref2)) 

646 self.assertFalse(butler.datastore.exists(ref3)) 

647 # Remove run1. This removes ref1 and ref3 from the registry (they're 

648 # already gone from the datastore, which is fine). 

649 butler.pruneCollection(run1, purge=True, unstore=True) 

650 with self.assertRaises(MissingCollectionError): 

651 butler.registry.getCollectionType(run1) 

652 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

653 [ref2]) 

654 self.assertTrue(butler.datastore.exists(ref2)) 

655 # Remove run2. This removes ref2 from the registry and the datastore. 

656 butler.pruneCollection(run2, purge=True, unstore=True) 

657 with self.assertRaises(MissingCollectionError): 

658 butler.registry.getCollectionType(run2) 

659 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

660 []) 

661 

662 def testPickle(self): 

663 """Test pickle support. 

664 """ 

665 butler = Butler(self.tmpConfigFile, run="ingest") 

666 butlerOut = pickle.loads(pickle.dumps(butler)) 

667 self.assertIsInstance(butlerOut, Butler) 

668 self.assertEqual(butlerOut._config, butler._config) 

669 self.assertEqual(butlerOut.collections, butler.collections) 

670 self.assertEqual(butlerOut.run, butler.run) 

671 

672 def testGetDatasetTypes(self): 

673 butler = Butler(self.tmpConfigFile, run="ingest") 

674 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

675 dimensionEntries = [ 

676 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

677 {"instrument": "DummyCamComp"}), 

678 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}), 

679 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

680 ] 

681 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

682 # Add needed Dimensions 

683 for args in dimensionEntries: 

684 butler.registry.insertDimensionData(*args) 

685 

686 # When a DatasetType is added to the registry entries are created 

687 # for each component. Need entries for each component in the test 

688 # configuration otherwise validation won't work. The ones that 

689 # are deliberately broken will be ignored later. 

690 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi"} 

691 components = set() 

692 for datasetTypeName in datasetTypeNames: 

693 # Create and register a DatasetType 

694 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

695 

696 for componentName in storageClass.components: 

697 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

698 

699 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

700 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

701 

702 # Now that we have some dataset types registered, validate them 

703 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

704 "datasetType.component"]) 

705 

706 # Add a new datasetType that will fail template validation 

707 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

708 if self.validationCanFail: 

709 with self.assertRaises(ValidationError): 

710 butler.validateConfiguration() 

711 

712 # Rerun validation but with a subset of dataset type names 

713 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

714 

715 # Rerun validation but ignore the bad datasetType 

716 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

717 "datasetType.component"]) 

718 

719 def testTransaction(self): 

720 butler = Butler(self.tmpConfigFile, run="ingest") 

721 datasetTypeName = "test_metric" 

722 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

723 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

724 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

725 "abstract_filter": "R"}), 

726 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

727 "physical_filter": "d-r"})) 

728 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

729 metric = makeExampleMetrics() 

730 dataId = {"instrument": "DummyCam", "visit": 42} 

731 # Create and register a DatasetType 

732 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

733 with self.assertRaises(TransactionTestError): 

734 with butler.transaction(): 

735 # Add needed Dimensions 

736 for args in dimensionEntries: 

737 butler.registry.insertDimensionData(*args) 

738 # Store a dataset 

739 ref = butler.put(metric, datasetTypeName, dataId) 

740 self.assertIsInstance(ref, DatasetRef) 

741 # Test getDirect 

742 metricOut = butler.getDirect(ref) 

743 self.assertEqual(metric, metricOut) 

744 # Test get 

745 metricOut = butler.get(datasetTypeName, dataId) 

746 self.assertEqual(metric, metricOut) 

747 # Check we can get components 

748 self.assertGetComponents(butler, ref, 

749 ("summary", "data", "output"), metric) 

750 raise TransactionTestError("This should roll back the entire transaction") 

751 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"): 

752 butler.registry.expandDataId(dataId) 

753 # Should raise LookupError for missing data ID value 

754 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

755 butler.get(datasetTypeName, dataId) 

756 # Also check explicitly if Dataset entry is missing 

757 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

758 # Direct retrieval should not find the file in the Datastore 

759 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

760 butler.getDirect(ref) 

761 

762 def testMakeRepo(self): 

763 """Test that we can write butler configuration to a new repository via 

764 the Butler.makeRepo interface and then instantiate a butler from the 

765 repo root. 

766 """ 

767 # Do not run the test if we know this datastore configuration does 

768 # not support a file system root 

769 if self.fullConfigKey is None: 

770 return 

771 

772 # Remove the file created in setUp 

773 os.unlink(self.tmpConfigFile) 

774 

775 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile)) 

776 limited = Config(self.configFile) 

777 butler1 = Butler(butlerConfig) 

778 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

779 config=Config(self.configFile), overwrite=True) 

780 full = Config(self.tmpConfigFile) 

781 butler2 = Butler(butlerConfig) 

782 # Butlers should have the same configuration regardless of whether 

783 # defaults were expanded. 

784 self.assertEqual(butler1._config, butler2._config) 

785 # Config files loaded directly should not be the same. 

786 self.assertNotEqual(limited, full) 

787 # Make sure "limited" doesn't have a few keys we know it should be 

788 # inheriting from defaults. 

789 self.assertIn(self.fullConfigKey, full) 

790 self.assertNotIn(self.fullConfigKey, limited) 

791 

792 # Collections don't appear until something is put in them 

793 collections1 = set(butler1.registry.queryCollections()) 

794 self.assertEqual(collections1, set()) 

795 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

796 

797 # Check that a config with no associated file name will not 

798 # work properly with relocatable Butler repo 

799 butlerConfig.configFile = None 

800 with self.assertRaises(ValueError): 

801 Butler(butlerConfig) 

802 

803 with self.assertRaises(FileExistsError): 

804 Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

805 config=Config(self.configFile), overwrite=False) 

806 

807 def testStringification(self): 

808 butler = Butler(self.tmpConfigFile, run="ingest") 

809 butlerStr = str(butler) 

810 

811 if self.datastoreStr is not None: 

812 for testStr in self.datastoreStr: 

813 self.assertIn(testStr, butlerStr) 

814 if self.registryStr is not None: 

815 self.assertIn(self.registryStr, butlerStr) 

816 

817 datastoreName = butler.datastore.name 

818 if self.datastoreName is not None: 

819 for testStr in self.datastoreName: 

820 self.assertIn(testStr, datastoreName) 

821 

822 

823class FileLikeDatastoreButlerTests(ButlerTests): 

824 """Common tests and specialization of ButlerTests for butlers backed 

825 by datastores that inherit from FileLikeDatastore. 

826 """ 

827 

828 def checkFileExists(self, root, path): 

829 """Checks if file exists at a given path (relative to root). 

830 

831 Test testPutTemplates verifies actual physical existance of the files 

832 in the requested location. For POSIXDatastore this test is equivalent 

833 to `os.path.exists` call. 

834 """ 

835 return os.path.exists(os.path.join(root, path)) 

836 

837 def testPutTemplates(self): 

838 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

839 butler = Butler(self.tmpConfigFile, run="ingest") 

840 

841 # Add needed Dimensions 

842 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

843 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

844 "name": "d-r", 

845 "abstract_filter": "R"}) 

846 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

847 "physical_filter": "d-r"}) 

848 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

849 "physical_filter": "d-r"}) 

850 

851 # Create and store a dataset 

852 metric = makeExampleMetrics() 

853 

854 # Create two almost-identical DatasetTypes (both will use default 

855 # template) 

856 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

857 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

858 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

859 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

860 

861 dataId1 = {"instrument": "DummyCamComp", "visit": np.int64(423)} 

862 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

863 dataId3 = {"instrument": "DummyCamComp", "visit": 425} 

864 

865 # Put with exactly the data ID keys needed 

866 ref = butler.put(metric, "metric1", dataId1) 

867 self.assertTrue(self.checkFileExists(butler.datastore.root, 

868 "ingest/metric1/d-r/DummyCamComp_423.pickle")) 

869 

870 # Check the template based on dimensions 

871 butler.datastore.templates.validateTemplates([ref]) 

872 

873 # Put with extra data ID keys (physical_filter is an optional 

874 # dependency); should not change template (at least the way we're 

875 # defining them to behave now; the important thing is that they 

876 # must be consistent). 

877 ref = butler.put(metric, "metric2", dataId2) 

878 self.assertTrue(self.checkFileExists(butler.datastore.root, 

879 "ingest/metric2/d-r/DummyCamComp_v423.pickle")) 

880 

881 # Check the template based on dimensions 

882 butler.datastore.templates.validateTemplates([ref]) 

883 

884 # Now use a file template that will not result in unique filenames 

885 ref = butler.put(metric, "metric3", dataId1) 

886 

887 # Check the template based on dimensions. This one is a bad template 

888 with self.assertRaises(FileTemplateValidationError): 

889 butler.datastore.templates.validateTemplates([ref]) 

890 

891 with self.assertRaises(FileExistsError): 

892 butler.put(metric, "metric3", dataId3) 

893 

894 def testImportExport(self): 

895 # Run put/get tests just to create and populate a repo. 

896 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

897 self.runImportExportTest(storageClass) 

898 

899 @unittest.expectedFailure 

900 def testImportExportVirtualComposite(self): 

901 # Run put/get tests just to create and populate a repo. 

902 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

903 self.runImportExportTest(storageClass) 

904 

905 def runImportExportTest(self, storageClass): 

906 exportButler = self.runPutGetTest(storageClass, "test_metric") 

907 # Test that the repo actually has at least one dataset. 

908 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

909 self.assertGreater(len(datasets), 0) 

910 # Export those datasets. We used TemporaryDirectory because there 

911 # doesn't seem to be a way to get the filename (as opposed to the file 

912 # object) from any of tempfile's temporary-file context managers. 

913 with tempfile.TemporaryDirectory() as exportDir: 

914 # TODO: When PosixDatastore supports transfer-on-exist, add tests 

915 # for that. 

916 exportFile = os.path.join(exportDir, "exports.yaml") 

917 with exportButler.export(filename=exportFile) as export: 

918 export.saveDatasets(datasets) 

919 self.assertTrue(os.path.exists(exportFile)) 

920 with tempfile.TemporaryDirectory() as importDir: 

921 Butler.makeRepo(importDir, config=Config(self.configFile)) 

922 importButler = Butler(importDir, run="ingest/run") 

923 importButler.import_(filename=exportFile, directory=exportButler.datastore.root, 

924 transfer="symlink") 

925 for ref in datasets: 

926 with self.subTest(ref=ref): 

927 # Test for existence by passing in the DatasetType and 

928 # data ID separately, to avoid lookup by dataset_id. 

929 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

930 

931 

932class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

933 """PosixDatastore specialization of a butler""" 

934 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

935 fullConfigKey = ".datastore.formatters" 

936 validationCanFail = True 

937 datastoreStr = ["/tmp"] 

938 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"] 

939 registryStr = "/gen3.sqlite3" 

940 

941 

942class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

943 """InMemoryDatastore specialization of a butler""" 

944 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

945 fullConfigKey = None 

946 useTempRoot = False 

947 validationCanFail = False 

948 datastoreStr = ["datastore='InMemory"] 

949 datastoreName = ["InMemoryDatastore@"] 

950 registryStr = ":memory:" 

951 

952 def testIngest(self): 

953 pass 

954 

955 

956class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

957 """PosixDatastore specialization""" 

958 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

959 fullConfigKey = ".datastore.datastores.1.formatters" 

960 validationCanFail = True 

961 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"] 

962 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1", 

963 "SecondDatastore"] 

964 registryStr = "/gen3.sqlite3" 

965 

966 

967class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

968 """Test that a yaml file in one location can refer to a root in another.""" 

969 

970 datastoreStr = ["dir1"] 

971 # Disable the makeRepo test since we are deliberately not using 

972 # butler.yaml as the config name. 

973 fullConfigKey = None 

974 

975 def setUp(self): 

976 self.root = tempfile.mkdtemp(dir=TESTDIR) 

977 

978 # Make a new repository in one place 

979 self.dir1 = os.path.join(self.root, "dir1") 

980 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

981 

982 # Move the yaml file to a different place and add a "root" 

983 self.dir2 = os.path.join(self.root, "dir2") 

984 safeMakeDir(self.dir2) 

985 configFile1 = os.path.join(self.dir1, "butler.yaml") 

986 config = Config(configFile1) 

987 config["root"] = self.dir1 

988 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

989 config.dumpToFile(configFile2) 

990 os.remove(configFile1) 

991 self.tmpConfigFile = configFile2 

992 

993 def testFileLocations(self): 

994 self.assertNotEqual(self.dir1, self.dir2) 

995 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

996 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

997 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

998 

999 

1000class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1001 """Test that a config file created by makeRepo outside of repo works.""" 

1002 

1003 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1004 

1005 def setUp(self): 

1006 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1007 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1008 

1009 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1010 Butler.makeRepo(self.root, config=Config(self.configFile), 

1011 outfile=self.tmpConfigFile) 

1012 

1013 def tearDown(self): 

1014 if os.path.exists(self.root2): 

1015 shutil.rmtree(self.root2, ignore_errors=True) 

1016 super().tearDown() 

1017 

1018 def testConfigExistence(self): 

1019 c = Config(self.tmpConfigFile) 

1020 uri_config = ButlerURI(c["root"]) 

1021 uri_expected = ButlerURI(self.root, forceDirectory=True) 

1022 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1023 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1024 

1025 def testPutGet(self): 

1026 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1027 self.runPutGetTest(storageClass, "test_metric") 

1028 

1029 

1030class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1031 """Test that a config file created by makeRepo outside of repo works.""" 

1032 

1033 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1034 

1035 def setUp(self): 

1036 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1037 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1038 

1039 self.tmpConfigFile = self.root2 

1040 Butler.makeRepo(self.root, config=Config(self.configFile), 

1041 outfile=self.tmpConfigFile) 

1042 

1043 def testConfigExistence(self): 

1044 # Append the yaml file else Config constructor does not know the file 

1045 # type. 

1046 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1047 super().testConfigExistence() 

1048 

1049 

1050class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1051 """Test that a config file created by makeRepo outside of repo works.""" 

1052 

1053 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1054 

1055 def setUp(self): 

1056 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1057 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1058 

1059 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

1060 Butler.makeRepo(self.root, config=Config(self.configFile), 

1061 outfile=self.tmpConfigFile) 

1062 

1063 

1064@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1065@mock_s3 

1066class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

1067 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1068 a local in-memory SqlRegistry. 

1069 """ 

1070 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1071 fullConfigKey = None 

1072 validationCanFail = True 

1073 

1074 bucketName = "anybucketname" 

1075 """Name of the Bucket that will be used in the tests. The name is read from 

1076 the config file used with the tests during set-up. 

1077 """ 

1078 

1079 root = "butlerRoot/" 

1080 """Root repository directory expected to be used in case useTempRoot=False. 

1081 Otherwise the root is set to a 20 characters long randomly generated string 

1082 during set-up. 

1083 """ 

1084 

1085 datastoreStr = [f"datastore={root}"] 

1086 """Contains all expected root locations in a format expected to be 

1087 returned by Butler stringification. 

1088 """ 

1089 

1090 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"] 

1091 """The expected format of the S3Datastore string.""" 

1092 

1093 registryStr = ":memory:" 

1094 """Expected format of the Registry string.""" 

1095 

1096 def genRoot(self): 

1097 """Returns a random string of len 20 to serve as a root 

1098 name for the temporary bucket repo. 

1099 

1100 This is equivalent to tempfile.mkdtemp as this is what self.root 

1101 becomes when useTempRoot is True. 

1102 """ 

1103 rndstr = "".join( 

1104 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1105 ) 

1106 return rndstr + "/" 

1107 

1108 def setUp(self): 

1109 config = Config(self.configFile) 

1110 uri = ButlerURI(config[".datastore.datastore.root"]) 

1111 self.bucketName = uri.netloc 

1112 

1113 # set up some fake credentials if they do not exist 

1114 self.usingDummyCredentials = setAwsEnvCredentials() 

1115 

1116 if self.useTempRoot: 

1117 self.root = self.genRoot() 

1118 rooturi = f"s3://{self.bucketName}/{self.root}" 

1119 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1120 

1121 # MOTO needs to know that we expect Bucket bucketname to exist 

1122 # (this used to be the class attribute bucketName) 

1123 s3 = boto3.resource("s3") 

1124 s3.create_bucket(Bucket=self.bucketName) 

1125 

1126 self.datastoreStr = f"datastore={self.root}" 

1127 self.datastoreName = [f"S3Datastore@{rooturi}"] 

1128 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1129 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1130 

1131 def tearDown(self): 

1132 s3 = boto3.resource("s3") 

1133 bucket = s3.Bucket(self.bucketName) 

1134 try: 

1135 bucket.objects.all().delete() 

1136 except botocore.exceptions.ClientError as e: 

1137 if e.response["Error"]["Code"] == "404": 

1138 # the key was not reachable - pass 

1139 pass 

1140 else: 

1141 raise 

1142 

1143 bucket = s3.Bucket(self.bucketName) 

1144 bucket.delete() 

1145 

1146 # unset any potentially set dummy credentials 

1147 if self.usingDummyCredentials: 

1148 unsetAwsEnvCredentials() 

1149 

1150 def checkFileExists(self, root, relpath): 

1151 """Checks if file exists at a given path (relative to root). 

1152 

1153 Test testPutTemplates verifies actual physical existance of the files 

1154 in the requested location. For S3Datastore this test is equivalent to 

1155 `lsst.daf.butler.core.s3utils.s3checkFileExists` call. 

1156 """ 

1157 uri = ButlerURI(root) 

1158 uri.updateFile(relpath) 

1159 return s3CheckFileExists(uri)[0] 

1160 

1161 @unittest.expectedFailure 

1162 def testImportExport(self): 

1163 super().testImportExport() 

1164 

1165 

1166if __name__ == "__main__": 1166 ↛ 1167line 1166 didn't jump to line 1167, because the condition on line 1166 was never true

1167 unittest.main()