Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import os 

26import posixpath 

27import unittest 

28import tempfile 

29import shutil 

30import pickle 

31import string 

32import random 

33import numpy as np 

34 

35try: 

36 import boto3 

37 import botocore 

38 from moto import mock_s3 

39except ImportError: 

40 boto3 = None 

41 

42 def mock_s3(cls): 

43 """A no-op decorator in case moto mock_s3 can not be imported. 

44 """ 

45 return cls 

46 

47from lsst.utils import doImport 

48from lsst.daf.butler.core.utils import safeMakeDir 

49from lsst.daf.butler import Butler, Config, ButlerConfig 

50from lsst.daf.butler import StorageClassFactory 

51from lsst.daf.butler import DatasetType, DatasetRef 

52from lsst.daf.butler import FileTemplateValidationError, ValidationError 

53from lsst.daf.butler import FileDataset 

54from lsst.daf.butler import CollectionSearch, CollectionType 

55from lsst.daf.butler import ButlerURI 

56from lsst.daf.butler import script 

57from lsst.daf.butler.registry import MissingCollectionError 

58from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

59from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials, 

60 unsetAwsEnvCredentials) 

61 

62from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

63 

64TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

65 

66 

67def makeExampleMetrics(): 

68 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

69 {"a": [1, 2, 3], 

70 "b": {"blue": 5, "red": "green"}}, 

71 [563, 234, 456.7, 752, 8, 9, 27] 

72 ) 

73 

74 

75class TransactionTestError(Exception): 

76 """Specific error for testing transactions, to prevent misdiagnosing 

77 that might otherwise occur when a standard exception is used. 

78 """ 

79 pass 

80 

81 

82class ButlerConfigTests(unittest.TestCase): 

83 """Simple tests for ButlerConfig that are not tested in other test cases. 

84 """ 

85 

86 def testSearchPath(self): 

87 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

88 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

89 config1 = ButlerConfig(configFile) 

90 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

91 

92 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

93 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

94 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

95 self.assertIn("testConfigs", "\n".join(cm.output)) 

96 

97 key = ("datastore", "records", "table") 

98 self.assertNotEqual(config1[key], config2[key]) 

99 self.assertEqual(config2[key], "override_record") 

100 

101 

102class ButlerPutGetTests: 

103 """Helper method for running a suite of put/get tests from different 

104 butler configurations.""" 

105 

106 root = None 

107 

108 @staticmethod 

109 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

110 """Create a DatasetType and register it 

111 """ 

112 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

113 registry.registerDatasetType(datasetType) 

114 return datasetType 

115 

116 @classmethod 

117 def setUpClass(cls): 

118 cls.storageClassFactory = StorageClassFactory() 

119 cls.storageClassFactory.addFromConfig(cls.configFile) 

120 

121 def assertGetComponents(self, butler, datasetRef, components, reference): 

122 datasetType = datasetRef.datasetType 

123 dataId = datasetRef.dataId 

124 for component in components: 

125 compTypeName = datasetType.componentTypeName(component) 

126 result = butler.get(compTypeName, dataId) 

127 self.assertEqual(result, getattr(reference, component)) 

128 

129 def tearDown(self): 

130 if self.root is not None and os.path.exists(self.root): 

131 shutil.rmtree(self.root, ignore_errors=True) 

132 

133 def runPutGetTest(self, storageClass, datasetTypeName): 

134 # New datasets will be added to run and tag, but we will only look in 

135 # tag when looking up datasets. 

136 run = "ingest/run" 

137 tag = "ingest" 

138 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag]) 

139 

140 # There will not be a collection yet 

141 collections = set(butler.registry.queryCollections()) 

142 self.assertEqual(collections, set([run, tag])) 

143 

144 # Create and register a DatasetType 

145 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

146 

147 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

148 

149 # Try to create one that will have a name that is too long 

150 with self.assertRaises(Exception) as cm: 

151 self.addDatasetType("DatasetTypeNameTooLong" * 50, dimensions, storageClass, butler.registry) 

152 self.assertIn("check constraint", str(cm.exception).lower()) 

153 

154 # Add needed Dimensions 

155 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

156 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

157 "name": "d-r", 

158 "abstract_filter": "R"}) 

159 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp", 

160 "id": 1, 

161 "name": "default"}) 

162 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

163 "name": "fourtwentythree", "physical_filter": "d-r", 

164 "visit_system": 1}) 

165 

166 # Add a second visit for some later tests 

167 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424, 

168 "name": "fourtwentyfour", "physical_filter": "d-r", 

169 "visit_system": 1}) 

170 

171 # Create and store a dataset 

172 metric = makeExampleMetrics() 

173 dataId = {"instrument": "DummyCamComp", "visit": 423} 

174 

175 # Create a DatasetRef for put 

176 refIn = DatasetRef(datasetType, dataId, id=None) 

177 

178 # Put with a preexisting id should fail 

179 with self.assertRaises(ValueError): 

180 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

181 

182 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

183 # and once with a DatasetType 

184 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

185 with self.subTest(args=args): 

186 ref = butler.put(metric, *args) 

187 self.assertIsInstance(ref, DatasetRef) 

188 

189 # Test getDirect 

190 metricOut = butler.getDirect(ref) 

191 self.assertEqual(metric, metricOut) 

192 # Test get 

193 metricOut = butler.get(ref.datasetType.name, dataId) 

194 self.assertEqual(metric, metricOut) 

195 # Test get with a datasetRef 

196 metricOut = butler.get(ref) 

197 self.assertEqual(metric, metricOut) 

198 # Test getDeferred with dataId 

199 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get() 

200 self.assertEqual(metric, metricOut) 

201 # Test getDeferred with a datasetRef 

202 metricOut = butler.getDeferred(ref).get() 

203 self.assertEqual(metric, metricOut) 

204 

205 # Check we can get components 

206 if storageClass.isComposite(): 

207 self.assertGetComponents(butler, ref, 

208 ("summary", "data", "output"), metric) 

209 

210 # Remove from the tagged collection only; after that we 

211 # shouldn't be able to find it unless we use the dataset_id. 

212 butler.pruneDatasets([ref]) 

213 with self.assertRaises(LookupError): 

214 butler.datasetExists(*args) 

215 # Registry still knows about it, if we use the dataset_id. 

216 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

217 # If we use the output ref with the dataset_id, we should 

218 # still be able to load it with getDirect(). 

219 self.assertEqual(metric, butler.getDirect(ref)) 

220 

221 # Reinsert into collection, then delete from Datastore *and* 

222 # remove from collection. 

223 butler.registry.associate(tag, [ref]) 

224 butler.pruneDatasets([ref], unstore=True) 

225 # Lookup with original args should still fail. 

226 with self.assertRaises(LookupError): 

227 butler.datasetExists(*args) 

228 # Now getDirect() should fail, too. 

229 with self.assertRaises(FileNotFoundError, msg=f"Checking ref {ref} not found"): 

230 butler.getDirect(ref) 

231 # Registry still knows about it, if we use the dataset_id. 

232 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

233 

234 # Now remove the dataset completely. 

235 butler.pruneDatasets([ref], purge=True, unstore=True) 

236 # Lookup with original args should still fail. 

237 with self.assertRaises(LookupError): 

238 butler.datasetExists(*args) 

239 # getDirect() should still fail. 

240 with self.assertRaises(FileNotFoundError): 

241 butler.getDirect(ref) 

242 # Registry shouldn't be able to find it by dataset_id anymore. 

243 self.assertIsNone(butler.registry.getDataset(ref.id)) 

244 

245 # Put the dataset again, since the last thing we did was remove it. 

246 ref = butler.put(metric, refIn) 

247 

248 # Get with parameters 

249 stop = 4 

250 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

251 self.assertNotEqual(metric, sliced) 

252 self.assertEqual(metric.summary, sliced.summary) 

253 self.assertEqual(metric.output, sliced.output) 

254 self.assertEqual(metric.data[:stop], sliced.data) 

255 # getDeferred with parameters 

256 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

257 self.assertNotEqual(metric, sliced) 

258 self.assertEqual(metric.summary, sliced.summary) 

259 self.assertEqual(metric.output, sliced.output) 

260 self.assertEqual(metric.data[:stop], sliced.data) 

261 # getDeferred with deferred parameters 

262 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

263 self.assertNotEqual(metric, sliced) 

264 self.assertEqual(metric.summary, sliced.summary) 

265 self.assertEqual(metric.output, sliced.output) 

266 self.assertEqual(metric.data[:stop], sliced.data) 

267 

268 if storageClass.isComposite(): 

269 # Check that components can be retrieved 

270 metricOut = butler.get(ref.datasetType.name, dataId) 

271 compNameS = ref.datasetType.componentTypeName("summary") 

272 compNameD = ref.datasetType.componentTypeName("data") 

273 summary = butler.get(compNameS, dataId) 

274 self.assertEqual(summary, metric.summary) 

275 data = butler.get(compNameD, dataId) 

276 self.assertEqual(data, metric.data) 

277 

278 if "counter" in storageClass.readComponents: 

279 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

280 self.assertEqual(count, len(data)) 

281 

282 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId, 

283 parameters={"slice": slice(stop)}) 

284 self.assertEqual(count, stop) 

285 

286 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

287 summary = butler.getDirect(compRef) 

288 self.assertEqual(summary, metric.summary) 

289 

290 # Create a Dataset type that has the same name but is inconsistent. 

291 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions, 

292 self.storageClassFactory.getStorageClass("Config")) 

293 

294 # Getting with a dataset type that does not match registry fails 

295 with self.assertRaises(ValueError): 

296 butler.get(inconsistentDatasetType, dataId) 

297 

298 # Combining a DatasetRef with a dataId should fail 

299 with self.assertRaises(ValueError): 

300 butler.get(ref, dataId) 

301 # Getting with an explicit ref should fail if the id doesn't match 

302 with self.assertRaises(ValueError): 

303 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

304 

305 # Getting a dataset with unknown parameters should fail 

306 with self.assertRaises(KeyError): 

307 butler.get(ref, parameters={"unsupported": True}) 

308 

309 # Check we have a collection 

310 collections = set(butler.registry.queryCollections()) 

311 self.assertEqual(collections, {run, tag}) 

312 

313 # Clean up to check that we can remove something that may have 

314 # already had a component removed 

315 butler.pruneDatasets([ref], unstore=True, purge=True) 

316 

317 # Add a dataset back in since some downstream tests require 

318 # something to be present 

319 ref = butler.put(metric, refIn) 

320 

321 return butler 

322 

323 def testDeferredCollectionPassing(self): 

324 # Construct a butler with no run or collection, but make it writeable. 

325 butler = Butler(self.tmpConfigFile, writeable=True) 

326 # Create and register a DatasetType 

327 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

328 datasetType = self.addDatasetType("example", dimensions, 

329 self.storageClassFactory.getStorageClass("StructuredData"), 

330 butler.registry) 

331 # Add needed Dimensions 

332 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

333 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

334 "name": "d-r", 

335 "abstract_filter": "R"}) 

336 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

337 "name": "fourtwentythree", "physical_filter": "d-r"}) 

338 dataId = {"instrument": "DummyCamComp", "visit": 423} 

339 # Create dataset. 

340 metric = makeExampleMetrics() 

341 # Register a new run and put dataset. 

342 run = "deferred" 

343 butler.registry.registerRun(run) 

344 ref = butler.put(metric, datasetType, dataId, run=run) 

345 # Putting with no run should fail with TypeError. 

346 with self.assertRaises(TypeError): 

347 butler.put(metric, datasetType, dataId) 

348 # Dataset should exist. 

349 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

350 # We should be able to get the dataset back, but with and without 

351 # a deferred dataset handle. 

352 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

353 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

354 # Trying to find the dataset without any collection is a TypeError. 

355 with self.assertRaises(TypeError): 

356 butler.datasetExists(datasetType, dataId) 

357 with self.assertRaises(TypeError): 

358 butler.get(datasetType, dataId) 

359 # Associate the dataset with a different collection. 

360 butler.registry.registerCollection("tagged") 

361 butler.registry.associate("tagged", [ref]) 

362 # Deleting the dataset from the new collection should make it findable 

363 # in the original collection. 

364 butler.pruneDatasets([ref], tags=["tagged"]) 

365 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

366 

367 

368class ButlerTests(ButlerPutGetTests): 

369 """Tests for Butler. 

370 """ 

371 useTempRoot = True 

372 

373 def setUp(self): 

374 """Create a new butler root for each test.""" 

375 if self.useTempRoot: 

376 self.root = tempfile.mkdtemp(dir=TESTDIR) 

377 Butler.makeRepo(self.root, config=Config(self.configFile)) 

378 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

379 else: 

380 self.root = None 

381 self.tmpConfigFile = self.configFile 

382 

383 def testConstructor(self): 

384 """Independent test of constructor. 

385 """ 

386 butler = Butler(self.tmpConfigFile, run="ingest") 

387 self.assertIsInstance(butler, Butler) 

388 

389 collections = set(butler.registry.queryCollections()) 

390 self.assertEqual(collections, {"ingest"}) 

391 

392 butler2 = Butler(butler=butler, collections=["other"]) 

393 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"])) 

394 self.assertIsNone(butler2.run) 

395 self.assertIs(butler.registry, butler2.registry) 

396 self.assertIs(butler.datastore, butler2.datastore) 

397 

398 def testBasicPutGet(self): 

399 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

400 self.runPutGetTest(storageClass, "test_metric") 

401 

402 def testCompositePutGetConcrete(self): 

403 

404 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

405 butler = self.runPutGetTest(storageClass, "test_metric") 

406 

407 # Should *not* be disassembled 

408 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

409 self.assertEqual(len(datasets), 1) 

410 uri, components = butler.getURIs(datasets[0]) 

411 self.assertIsInstance(uri, ButlerURI) 

412 self.assertFalse(components) 

413 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

414 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

415 

416 # Predicted dataset 

417 dataId = {"instrument": "DummyCamComp", "visit": 424} 

418 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

419 self.assertFalse(components) 

420 self.assertIsInstance(uri, ButlerURI) 

421 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

422 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

423 

424 def testCompositePutGetVirtual(self): 

425 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

426 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

427 

428 # Should be disassembled 

429 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

430 self.assertEqual(len(datasets), 1) 

431 uri, components = butler.getURIs(datasets[0]) 

432 

433 if butler.datastore.isEphemeral: 

434 # Never disassemble in-memory datastore 

435 self.assertIsInstance(uri, ButlerURI) 

436 self.assertFalse(components) 

437 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

438 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

439 else: 

440 self.assertIsNone(uri) 

441 self.assertEqual(set(components), set(storageClass.components)) 

442 for compuri in components.values(): 

443 self.assertIsInstance(compuri, ButlerURI) 

444 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

445 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

446 

447 # Predicted dataset 

448 dataId = {"instrument": "DummyCamComp", "visit": 424} 

449 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

450 

451 if butler.datastore.isEphemeral: 

452 # Never disassembled 

453 self.assertIsInstance(uri, ButlerURI) 

454 self.assertFalse(components) 

455 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

456 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

457 else: 

458 self.assertIsNone(uri) 

459 self.assertEqual(set(components), set(storageClass.components)) 

460 for compuri in components.values(): 

461 self.assertIsInstance(compuri, ButlerURI) 

462 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

463 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

464 

465 def testIngest(self): 

466 butler = Butler(self.tmpConfigFile, run="ingest") 

467 

468 # Create and register a DatasetType 

469 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

470 

471 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

472 datasetTypeName = "metric" 

473 

474 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

475 

476 # Add needed Dimensions 

477 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

478 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

479 "name": "d-r", 

480 "abstract_filter": "R"}) 

481 for detector in (1, 2): 

482 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

483 "full_name": f"detector{detector}"}) 

484 

485 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

486 "name": "fourtwentythree", "physical_filter": "d-r"}, 

487 {"instrument": "DummyCamComp", "id": 424, 

488 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

489 

490 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

491 dataRoot = os.path.join(TESTDIR, "data", "basic") 

492 datasets = [] 

493 for detector in (1, 2): 

494 detector_name = f"detector_{detector}" 

495 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

496 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

497 # Create a DatasetRef for ingest 

498 refIn = DatasetRef(datasetType, dataId, id=None) 

499 

500 datasets.append(FileDataset(path=metricFile, 

501 refs=[refIn], 

502 formatter=formatter)) 

503 

504 butler.ingest(*datasets, transfer="copy") 

505 

506 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

507 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

508 

509 metrics1 = butler.get(datasetTypeName, dataId1) 

510 metrics2 = butler.get(datasetTypeName, dataId2) 

511 self.assertNotEqual(metrics1, metrics2) 

512 

513 # Compare URIs 

514 uri1 = butler.getURI(datasetTypeName, dataId1) 

515 uri2 = butler.getURI(datasetTypeName, dataId2) 

516 self.assertNotEqual(uri1, uri2) 

517 

518 # Now do a multi-dataset but single file ingest 

519 metricFile = os.path.join(dataRoot, "detectors.yaml") 

520 refs = [] 

521 for detector in (1, 2): 

522 detector_name = f"detector_{detector}" 

523 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

524 # Create a DatasetRef for ingest 

525 refs.append(DatasetRef(datasetType, dataId, id=None)) 

526 

527 datasets = [] 

528 datasets.append(FileDataset(path=metricFile, 

529 refs=refs, 

530 formatter=MultiDetectorFormatter)) 

531 

532 butler.ingest(*datasets, transfer="copy") 

533 

534 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

535 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

536 

537 multi1 = butler.get(datasetTypeName, dataId1) 

538 multi2 = butler.get(datasetTypeName, dataId2) 

539 

540 self.assertEqual(multi1, metrics1) 

541 self.assertEqual(multi2, metrics2) 

542 

543 # Compare URIs 

544 uri1 = butler.getURI(datasetTypeName, dataId1) 

545 uri2 = butler.getURI(datasetTypeName, dataId2) 

546 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

547 

548 # Test that removing one does not break the second 

549 # This line will issue a warning log message for a ChainedDatastore 

550 # that uses an InMemoryDatastore since in-memory can not ingest 

551 # files. 

552 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

553 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

554 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

555 multi2b = butler.get(datasetTypeName, dataId2) 

556 self.assertEqual(multi2, multi2b) 

557 

558 def testPruneCollections(self): 

559 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

560 butler = Butler(self.tmpConfigFile, writeable=True) 

561 # Load registry data with dimensions to hang datasets off of. 

562 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

563 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

564 # Add some RUN-type collections. 

565 run1 = "run1" 

566 butler.registry.registerRun(run1) 

567 run2 = "run2" 

568 butler.registry.registerRun(run2) 

569 # put some datasets. ref1 and ref2 have the same data ID, and are in 

570 # different runs. ref3 has a different data ID. 

571 metric = makeExampleMetrics() 

572 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

573 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

574 butler.registry) 

575 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

576 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

577 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

578 # Try to delete a RUN collection without purge, or with purge and not 

579 # unstore. 

580 with self.assertRaises(TypeError): 

581 butler.pruneCollection(run1) 

582 with self.assertRaises(TypeError): 

583 butler.pruneCollection(run2, purge=True) 

584 # Add a TAGGED collection and associate ref3 only into it. 

585 tag1 = "tag1" 

586 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

587 butler.registry.associate(tag1, [ref3]) 

588 # Add a CHAINED collection that searches run1 and then run2. It 

589 # logically contains only ref1, because ref2 is shadowed due to them 

590 # having the same data ID and dataset type. 

591 chain1 = "chain1" 

592 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

593 butler.registry.setCollectionChain(chain1, [run1, run2]) 

594 # Try to delete RUN collections, which should fail with complete 

595 # rollback because they're still referenced by the CHAINED 

596 # collection. 

597 with self.assertRaises(Exception): 

598 butler.pruneCollection(run1, pruge=True, unstore=True) 

599 with self.assertRaises(Exception): 

600 butler.pruneCollection(run2, pruge=True, unstore=True) 

601 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

602 [ref1, ref2, ref3]) 

603 self.assertTrue(butler.datastore.exists(ref1)) 

604 self.assertTrue(butler.datastore.exists(ref2)) 

605 self.assertTrue(butler.datastore.exists(ref3)) 

606 # Try to delete CHAINED and TAGGED collections with purge; should not 

607 # work. 

608 with self.assertRaises(TypeError): 

609 butler.pruneCollection(tag1, purge=True, unstore=True) 

610 with self.assertRaises(TypeError): 

611 butler.pruneCollection(chain1, purge=True, unstore=True) 

612 # Remove the tagged collection with unstore=False. This should not 

613 # affect the datasets. 

614 butler.pruneCollection(tag1) 

615 with self.assertRaises(MissingCollectionError): 

616 butler.registry.getCollectionType(tag1) 

617 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

618 [ref1, ref2, ref3]) 

619 self.assertTrue(butler.datastore.exists(ref1)) 

620 self.assertTrue(butler.datastore.exists(ref2)) 

621 self.assertTrue(butler.datastore.exists(ref3)) 

622 # Add the tagged collection back in, and remove it with unstore=True. 

623 # This should remove ref3 only from the datastore. 

624 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

625 butler.registry.associate(tag1, [ref3]) 

626 butler.pruneCollection(tag1, unstore=True) 

627 with self.assertRaises(MissingCollectionError): 

628 butler.registry.getCollectionType(tag1) 

629 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

630 [ref1, ref2, ref3]) 

631 self.assertTrue(butler.datastore.exists(ref1)) 

632 self.assertTrue(butler.datastore.exists(ref2)) 

633 self.assertFalse(butler.datastore.exists(ref3)) 

634 # Delete the chain with unstore=False. The datasets should not be 

635 # affected at all. 

636 butler.pruneCollection(chain1) 

637 with self.assertRaises(MissingCollectionError): 

638 butler.registry.getCollectionType(chain1) 

639 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

640 [ref1, ref2, ref3]) 

641 self.assertTrue(butler.datastore.exists(ref1)) 

642 self.assertTrue(butler.datastore.exists(ref2)) 

643 self.assertFalse(butler.datastore.exists(ref3)) 

644 # Redefine and then delete the chain with unstore=True. Only ref1 

645 # should be unstored (ref3 has already been unstored, but otherwise 

646 # would be now). 

647 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

648 butler.registry.setCollectionChain(chain1, [run1, run2]) 

649 butler.pruneCollection(chain1, unstore=True) 

650 with self.assertRaises(MissingCollectionError): 

651 butler.registry.getCollectionType(chain1) 

652 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

653 [ref1, ref2, ref3]) 

654 self.assertFalse(butler.datastore.exists(ref1)) 

655 self.assertTrue(butler.datastore.exists(ref2)) 

656 self.assertFalse(butler.datastore.exists(ref3)) 

657 # Remove run1. This removes ref1 and ref3 from the registry (they're 

658 # already gone from the datastore, which is fine). 

659 butler.pruneCollection(run1, purge=True, unstore=True) 

660 with self.assertRaises(MissingCollectionError): 

661 butler.registry.getCollectionType(run1) 

662 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

663 [ref2]) 

664 self.assertTrue(butler.datastore.exists(ref2)) 

665 # Remove run2. This removes ref2 from the registry and the datastore. 

666 butler.pruneCollection(run2, purge=True, unstore=True) 

667 with self.assertRaises(MissingCollectionError): 

668 butler.registry.getCollectionType(run2) 

669 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

670 []) 

671 

672 def testPickle(self): 

673 """Test pickle support. 

674 """ 

675 butler = Butler(self.tmpConfigFile, run="ingest") 

676 butlerOut = pickle.loads(pickle.dumps(butler)) 

677 self.assertIsInstance(butlerOut, Butler) 

678 self.assertEqual(butlerOut._config, butler._config) 

679 self.assertEqual(butlerOut.collections, butler.collections) 

680 self.assertEqual(butlerOut.run, butler.run) 

681 

682 def testGetDatasetTypes(self): 

683 butler = Butler(self.tmpConfigFile, run="ingest") 

684 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

685 dimensionEntries = [ 

686 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

687 {"instrument": "DummyCamComp"}), 

688 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}), 

689 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

690 ] 

691 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

692 # Add needed Dimensions 

693 for args in dimensionEntries: 

694 butler.registry.insertDimensionData(*args) 

695 

696 # When a DatasetType is added to the registry entries are not created 

697 # for components but querying them can return the components. 

698 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

699 components = set() 

700 for datasetTypeName in datasetTypeNames: 

701 # Create and register a DatasetType 

702 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

703 

704 for componentName in storageClass.components: 

705 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

706 

707 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

708 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

709 

710 # Now that we have some dataset types registered, validate them 

711 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

712 "datasetType.component"]) 

713 

714 # Add a new datasetType that will fail template validation 

715 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

716 if self.validationCanFail: 

717 with self.assertRaises(ValidationError): 

718 butler.validateConfiguration() 

719 

720 # Rerun validation but with a subset of dataset type names 

721 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

722 

723 # Rerun validation but ignore the bad datasetType 

724 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

725 "datasetType.component"]) 

726 

727 def testTransaction(self): 

728 butler = Butler(self.tmpConfigFile, run="ingest") 

729 datasetTypeName = "test_metric" 

730 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

731 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

732 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

733 "abstract_filter": "R"}), 

734 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

735 "physical_filter": "d-r"})) 

736 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

737 metric = makeExampleMetrics() 

738 dataId = {"instrument": "DummyCam", "visit": 42} 

739 # Create and register a DatasetType 

740 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

741 with self.assertRaises(TransactionTestError): 

742 with butler.transaction(): 

743 # Add needed Dimensions 

744 for args in dimensionEntries: 

745 butler.registry.insertDimensionData(*args) 

746 # Store a dataset 

747 ref = butler.put(metric, datasetTypeName, dataId) 

748 self.assertIsInstance(ref, DatasetRef) 

749 # Test getDirect 

750 metricOut = butler.getDirect(ref) 

751 self.assertEqual(metric, metricOut) 

752 # Test get 

753 metricOut = butler.get(datasetTypeName, dataId) 

754 self.assertEqual(metric, metricOut) 

755 # Check we can get components 

756 self.assertGetComponents(butler, ref, 

757 ("summary", "data", "output"), metric) 

758 raise TransactionTestError("This should roll back the entire transaction") 

759 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"): 

760 butler.registry.expandDataId(dataId) 

761 # Should raise LookupError for missing data ID value 

762 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

763 butler.get(datasetTypeName, dataId) 

764 # Also check explicitly if Dataset entry is missing 

765 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

766 # Direct retrieval should not find the file in the Datastore 

767 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

768 butler.getDirect(ref) 

769 

770 def testMakeRepo(self): 

771 """Test that we can write butler configuration to a new repository via 

772 the Butler.makeRepo interface and then instantiate a butler from the 

773 repo root. 

774 """ 

775 # Do not run the test if we know this datastore configuration does 

776 # not support a file system root 

777 if self.fullConfigKey is None: 

778 return 

779 

780 # Remove the file created in setUp 

781 os.unlink(self.tmpConfigFile) 

782 

783 createRegistry = not self.useTempRoot 

784 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile), 

785 createRegistry=createRegistry) 

786 limited = Config(self.configFile) 

787 butler1 = Butler(butlerConfig) 

788 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

789 config=Config(self.configFile), overwrite=True) 

790 full = Config(self.tmpConfigFile) 

791 butler2 = Butler(butlerConfig) 

792 # Butlers should have the same configuration regardless of whether 

793 # defaults were expanded. 

794 self.assertEqual(butler1._config, butler2._config) 

795 # Config files loaded directly should not be the same. 

796 self.assertNotEqual(limited, full) 

797 # Make sure "limited" doesn't have a few keys we know it should be 

798 # inheriting from defaults. 

799 self.assertIn(self.fullConfigKey, full) 

800 self.assertNotIn(self.fullConfigKey, limited) 

801 

802 # Collections don't appear until something is put in them 

803 collections1 = set(butler1.registry.queryCollections()) 

804 self.assertEqual(collections1, set()) 

805 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

806 

807 # Check that a config with no associated file name will not 

808 # work properly with relocatable Butler repo 

809 butlerConfig.configFile = None 

810 with self.assertRaises(ValueError): 

811 Butler(butlerConfig) 

812 

813 with self.assertRaises(FileExistsError): 

814 Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

815 config=Config(self.configFile), overwrite=False) 

816 

817 def testStringification(self): 

818 butler = Butler(self.tmpConfigFile, run="ingest") 

819 butlerStr = str(butler) 

820 

821 if self.datastoreStr is not None: 

822 for testStr in self.datastoreStr: 

823 self.assertIn(testStr, butlerStr) 

824 if self.registryStr is not None: 

825 self.assertIn(self.registryStr, butlerStr) 

826 

827 datastoreName = butler.datastore.name 

828 if self.datastoreName is not None: 

829 for testStr in self.datastoreName: 

830 self.assertIn(testStr, datastoreName) 

831 

832 

833class FileLikeDatastoreButlerTests(ButlerTests): 

834 """Common tests and specialization of ButlerTests for butlers backed 

835 by datastores that inherit from FileLikeDatastore. 

836 """ 

837 

838 def checkFileExists(self, root, path): 

839 """Checks if file exists at a given path (relative to root). 

840 

841 Test testPutTemplates verifies actual physical existance of the files 

842 in the requested location. For POSIXDatastore this test is equivalent 

843 to `os.path.exists` call. 

844 """ 

845 return os.path.exists(os.path.join(root, path)) 

846 

847 def testPutTemplates(self): 

848 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

849 butler = Butler(self.tmpConfigFile, run="ingest") 

850 

851 # Add needed Dimensions 

852 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

853 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

854 "name": "d-r", 

855 "abstract_filter": "R"}) 

856 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

857 "physical_filter": "d-r"}) 

858 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

859 "physical_filter": "d-r"}) 

860 

861 # Create and store a dataset 

862 metric = makeExampleMetrics() 

863 

864 # Create two almost-identical DatasetTypes (both will use default 

865 # template) 

866 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

867 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

868 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

869 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

870 

871 dataId1 = {"instrument": "DummyCamComp", "visit": np.int64(423)} 

872 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

873 dataId3 = {"instrument": "DummyCamComp", "visit": 425} 

874 

875 # Put with exactly the data ID keys needed 

876 ref = butler.put(metric, "metric1", dataId1) 

877 self.assertTrue(self.checkFileExists(butler.datastore.root, 

878 "ingest/metric1/d-r/DummyCamComp_423.pickle")) 

879 

880 # Check the template based on dimensions 

881 butler.datastore.templates.validateTemplates([ref]) 

882 

883 # Put with extra data ID keys (physical_filter is an optional 

884 # dependency); should not change template (at least the way we're 

885 # defining them to behave now; the important thing is that they 

886 # must be consistent). 

887 ref = butler.put(metric, "metric2", dataId2) 

888 self.assertTrue(self.checkFileExists(butler.datastore.root, 

889 "ingest/metric2/d-r/DummyCamComp_v423.pickle")) 

890 

891 # Check the template based on dimensions 

892 butler.datastore.templates.validateTemplates([ref]) 

893 

894 # Now use a file template that will not result in unique filenames 

895 ref = butler.put(metric, "metric3", dataId1) 

896 

897 # Check the template based on dimensions. This one is a bad template 

898 with self.assertRaises(FileTemplateValidationError): 

899 butler.datastore.templates.validateTemplates([ref]) 

900 

901 with self.assertRaises(FileExistsError): 

902 butler.put(metric, "metric3", dataId3) 

903 

904 def testImportExport(self): 

905 # Run put/get tests just to create and populate a repo. 

906 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

907 self.runImportExportTest(storageClass) 

908 

909 @unittest.expectedFailure 

910 def testImportExportVirtualComposite(self): 

911 # Run put/get tests just to create and populate a repo. 

912 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

913 self.runImportExportTest(storageClass) 

914 

915 def runImportExportTest(self, storageClass): 

916 exportButler = self.runPutGetTest(storageClass, "test_metric") 

917 # Test that the repo actually has at least one dataset. 

918 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

919 self.assertGreater(len(datasets), 0) 

920 # Export those datasets. We used TemporaryDirectory because there 

921 # doesn't seem to be a way to get the filename (as opposed to the file 

922 # object) from any of tempfile's temporary-file context managers. 

923 with tempfile.TemporaryDirectory() as exportDir: 

924 # TODO: When PosixDatastore supports transfer-on-exist, add tests 

925 # for that. 

926 exportFile = os.path.join(exportDir, "exports.yaml") 

927 with exportButler.export(filename=exportFile) as export: 

928 export.saveDatasets(datasets) 

929 self.assertTrue(os.path.exists(exportFile)) 

930 with tempfile.TemporaryDirectory() as importDir: 

931 Butler.makeRepo(importDir, config=Config(self.configFile)) 

932 # Calling script.butlerImport tests the implementation of the 

933 # butler command line interface "import" subcommand. Functions 

934 # in the script folder are generally considered protected and 

935 # should not be used as public api. 

936 with open(exportFile, "r") as f: 

937 script.butlerImport(importDir, output_run="ingest/run", export_file=f, 

938 directory=exportButler.datastore.root, transfer="symlink") 

939 importButler = Butler(importDir, run="ingest/run") 

940 for ref in datasets: 

941 with self.subTest(ref=ref): 

942 # Test for existence by passing in the DatasetType and 

943 # data ID separately, to avoid lookup by dataset_id. 

944 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

945 

946 

947class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

948 """PosixDatastore specialization of a butler""" 

949 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

950 fullConfigKey = ".datastore.formatters" 

951 validationCanFail = True 

952 datastoreStr = ["/tmp"] 

953 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"] 

954 registryStr = "/gen3.sqlite3" 

955 

956 

957class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

958 """InMemoryDatastore specialization of a butler""" 

959 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

960 fullConfigKey = None 

961 useTempRoot = False 

962 validationCanFail = False 

963 datastoreStr = ["datastore='InMemory"] 

964 datastoreName = ["InMemoryDatastore@"] 

965 registryStr = ":memory:" 

966 

967 def testIngest(self): 

968 pass 

969 

970 

971class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

972 """PosixDatastore specialization""" 

973 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

974 fullConfigKey = ".datastore.datastores.1.formatters" 

975 validationCanFail = True 

976 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"] 

977 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1", 

978 "SecondDatastore"] 

979 registryStr = "/gen3.sqlite3" 

980 

981 

982class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

983 """Test that a yaml file in one location can refer to a root in another.""" 

984 

985 datastoreStr = ["dir1"] 

986 # Disable the makeRepo test since we are deliberately not using 

987 # butler.yaml as the config name. 

988 fullConfigKey = None 

989 

990 def setUp(self): 

991 self.root = tempfile.mkdtemp(dir=TESTDIR) 

992 

993 # Make a new repository in one place 

994 self.dir1 = os.path.join(self.root, "dir1") 

995 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

996 

997 # Move the yaml file to a different place and add a "root" 

998 self.dir2 = os.path.join(self.root, "dir2") 

999 safeMakeDir(self.dir2) 

1000 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1001 config = Config(configFile1) 

1002 config["root"] = self.dir1 

1003 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1004 config.dumpToFile(configFile2) 

1005 os.remove(configFile1) 

1006 self.tmpConfigFile = configFile2 

1007 

1008 def testFileLocations(self): 

1009 self.assertNotEqual(self.dir1, self.dir2) 

1010 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1011 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1012 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1013 

1014 

1015class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1016 """Test that a config file created by makeRepo outside of repo works.""" 

1017 

1018 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1019 

1020 def setUp(self): 

1021 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1022 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1023 

1024 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1025 Butler.makeRepo(self.root, config=Config(self.configFile), 

1026 outfile=self.tmpConfigFile) 

1027 

1028 def tearDown(self): 

1029 if os.path.exists(self.root2): 

1030 shutil.rmtree(self.root2, ignore_errors=True) 

1031 super().tearDown() 

1032 

1033 def testConfigExistence(self): 

1034 c = Config(self.tmpConfigFile) 

1035 uri_config = ButlerURI(c["root"]) 

1036 uri_expected = ButlerURI(self.root, forceDirectory=True) 

1037 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1038 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1039 

1040 def testPutGet(self): 

1041 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1042 self.runPutGetTest(storageClass, "test_metric") 

1043 

1044 

1045class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1046 """Test that a config file created by makeRepo outside of repo works.""" 

1047 

1048 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1049 

1050 def setUp(self): 

1051 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1052 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1053 

1054 self.tmpConfigFile = self.root2 

1055 Butler.makeRepo(self.root, config=Config(self.configFile), 

1056 outfile=self.tmpConfigFile) 

1057 

1058 def testConfigExistence(self): 

1059 # Append the yaml file else Config constructor does not know the file 

1060 # type. 

1061 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1062 super().testConfigExistence() 

1063 

1064 

1065class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1066 """Test that a config file created by makeRepo outside of repo works.""" 

1067 

1068 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1069 

1070 def setUp(self): 

1071 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1072 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1073 

1074 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

1075 Butler.makeRepo(self.root, config=Config(self.configFile), 

1076 outfile=self.tmpConfigFile) 

1077 

1078 

1079@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1080@mock_s3 

1081class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

1082 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1083 a local in-memory SqlRegistry. 

1084 """ 

1085 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1086 fullConfigKey = None 

1087 validationCanFail = True 

1088 

1089 bucketName = "anybucketname" 

1090 """Name of the Bucket that will be used in the tests. The name is read from 

1091 the config file used with the tests during set-up. 

1092 """ 

1093 

1094 root = "butlerRoot/" 

1095 """Root repository directory expected to be used in case useTempRoot=False. 

1096 Otherwise the root is set to a 20 characters long randomly generated string 

1097 during set-up. 

1098 """ 

1099 

1100 datastoreStr = [f"datastore={root}"] 

1101 """Contains all expected root locations in a format expected to be 

1102 returned by Butler stringification. 

1103 """ 

1104 

1105 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"] 

1106 """The expected format of the S3Datastore string.""" 

1107 

1108 registryStr = ":memory:" 

1109 """Expected format of the Registry string.""" 

1110 

1111 def genRoot(self): 

1112 """Returns a random string of len 20 to serve as a root 

1113 name for the temporary bucket repo. 

1114 

1115 This is equivalent to tempfile.mkdtemp as this is what self.root 

1116 becomes when useTempRoot is True. 

1117 """ 

1118 rndstr = "".join( 

1119 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1120 ) 

1121 return rndstr + "/" 

1122 

1123 def setUp(self): 

1124 config = Config(self.configFile) 

1125 uri = ButlerURI(config[".datastore.datastore.root"]) 

1126 self.bucketName = uri.netloc 

1127 

1128 # set up some fake credentials if they do not exist 

1129 self.usingDummyCredentials = setAwsEnvCredentials() 

1130 

1131 if self.useTempRoot: 

1132 self.root = self.genRoot() 

1133 rooturi = f"s3://{self.bucketName}/{self.root}" 

1134 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1135 

1136 # MOTO needs to know that we expect Bucket bucketname to exist 

1137 # (this used to be the class attribute bucketName) 

1138 s3 = boto3.resource("s3") 

1139 s3.create_bucket(Bucket=self.bucketName) 

1140 

1141 self.datastoreStr = f"datastore={self.root}" 

1142 self.datastoreName = [f"S3Datastore@{rooturi}"] 

1143 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1144 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1145 

1146 def tearDown(self): 

1147 s3 = boto3.resource("s3") 

1148 bucket = s3.Bucket(self.bucketName) 

1149 try: 

1150 bucket.objects.all().delete() 

1151 except botocore.exceptions.ClientError as e: 

1152 if e.response["Error"]["Code"] == "404": 

1153 # the key was not reachable - pass 

1154 pass 

1155 else: 

1156 raise 

1157 

1158 bucket = s3.Bucket(self.bucketName) 

1159 bucket.delete() 

1160 

1161 # unset any potentially set dummy credentials 

1162 if self.usingDummyCredentials: 

1163 unsetAwsEnvCredentials() 

1164 

1165 def checkFileExists(self, root, relpath): 

1166 """Checks if file exists at a given path (relative to root). 

1167 

1168 Test testPutTemplates verifies actual physical existance of the files 

1169 in the requested location. For S3Datastore this test is equivalent to 

1170 `lsst.daf.butler.core.s3utils.s3checkFileExists` call. 

1171 """ 

1172 uri = ButlerURI(root) 

1173 uri.updateFile(relpath) 

1174 return s3CheckFileExists(uri)[0] 

1175 

1176 @unittest.expectedFailure 

1177 def testImportExport(self): 

1178 super().testImportExport() 

1179 

1180 

1181if __name__ == "__main__": 1181 ↛ 1182line 1181 didn't jump to line 1182, because the condition on line 1181 was never true

1182 unittest.main()