Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import os 

26import posixpath 

27import unittest 

28import tempfile 

29import shutil 

30import pickle 

31import string 

32import random 

33import numpy as np 

34 

35try: 

36 import boto3 

37 import botocore 

38 from moto import mock_s3 

39except ImportError: 

40 boto3 = None 

41 

42 def mock_s3(cls): 

43 """A no-op decorator in case moto mock_s3 can not be imported. 

44 """ 

45 return cls 

46 

47from lsst.utils import doImport 

48from lsst.daf.butler.core.utils import safeMakeDir 

49from lsst.daf.butler import Butler, Config, ButlerConfig 

50from lsst.daf.butler import StorageClassFactory 

51from lsst.daf.butler import DatasetType, DatasetRef 

52from lsst.daf.butler import FileTemplateValidationError, ValidationError 

53from lsst.daf.butler import FileDataset 

54from lsst.daf.butler import CollectionSearch, CollectionType 

55from lsst.daf.butler import ButlerURI 

56from lsst.daf.butler import script 

57from lsst.daf.butler.registry import MissingCollectionError 

58from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

59from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials, 

60 unsetAwsEnvCredentials) 

61 

62from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

63 

64TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

65 

66 

67def makeExampleMetrics(): 

68 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

69 {"a": [1, 2, 3], 

70 "b": {"blue": 5, "red": "green"}}, 

71 [563, 234, 456.7, 752, 8, 9, 27] 

72 ) 

73 

74 

75class TransactionTestError(Exception): 

76 """Specific error for testing transactions, to prevent misdiagnosing 

77 that might otherwise occur when a standard exception is used. 

78 """ 

79 pass 

80 

81 

82class ButlerConfigTests(unittest.TestCase): 

83 """Simple tests for ButlerConfig that are not tested in other test cases. 

84 """ 

85 

86 def testSearchPath(self): 

87 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

88 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

89 config1 = ButlerConfig(configFile) 

90 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

91 

92 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

93 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

94 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

95 self.assertIn("testConfigs", "\n".join(cm.output)) 

96 

97 key = ("datastore", "records", "table") 

98 self.assertNotEqual(config1[key], config2[key]) 

99 self.assertEqual(config2[key], "override_record") 

100 

101 

102class ButlerPutGetTests: 

103 """Helper method for running a suite of put/get tests from different 

104 butler configurations.""" 

105 

106 root = None 

107 

108 @staticmethod 

109 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

110 """Create a DatasetType and register it 

111 """ 

112 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

113 registry.registerDatasetType(datasetType) 

114 return datasetType 

115 

116 @classmethod 

117 def setUpClass(cls): 

118 cls.storageClassFactory = StorageClassFactory() 

119 cls.storageClassFactory.addFromConfig(cls.configFile) 

120 

121 def assertGetComponents(self, butler, datasetRef, components, reference): 

122 datasetTypeName = datasetRef.datasetType.name 

123 dataId = datasetRef.dataId 

124 for component in components: 

125 compTypeName = DatasetType.nameWithComponent(datasetTypeName, component) 

126 result = butler.get(compTypeName, dataId) 

127 self.assertEqual(result, getattr(reference, component)) 

128 

129 def tearDown(self): 

130 if self.root is not None and os.path.exists(self.root): 

131 shutil.rmtree(self.root, ignore_errors=True) 

132 

133 def runPutGetTest(self, storageClass, datasetTypeName): 

134 # New datasets will be added to run and tag, but we will only look in 

135 # tag when looking up datasets. 

136 run = "ingest/run" 

137 tag = "ingest" 

138 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag]) 

139 

140 # There will not be a collection yet 

141 collections = set(butler.registry.queryCollections()) 

142 self.assertEqual(collections, set([run, tag])) 

143 

144 # Create and register a DatasetType 

145 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

146 

147 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

148 

149 # Try to create one that will have a name that is too long 

150 with self.assertRaises(Exception) as cm: 

151 self.addDatasetType("DatasetTypeNameTooLong" * 50, dimensions, storageClass, butler.registry) 

152 self.assertIn("check constraint", str(cm.exception).lower()) 

153 

154 # Add needed Dimensions 

155 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

156 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

157 "name": "d-r", 

158 "abstract_filter": "R"}) 

159 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp", 

160 "id": 1, 

161 "name": "default"}) 

162 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

163 "name": "fourtwentythree", "physical_filter": "d-r", 

164 "visit_system": 1}) 

165 

166 # Add a second visit for some later tests 

167 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424, 

168 "name": "fourtwentyfour", "physical_filter": "d-r", 

169 "visit_system": 1}) 

170 

171 # Create and store a dataset 

172 metric = makeExampleMetrics() 

173 dataId = {"instrument": "DummyCamComp", "visit": 423} 

174 

175 # Create a DatasetRef for put 

176 refIn = DatasetRef(datasetType, dataId, id=None) 

177 

178 # Put with a preexisting id should fail 

179 with self.assertRaises(ValueError): 

180 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

181 

182 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

183 # and once with a DatasetType 

184 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

185 with self.subTest(args=args): 

186 ref = butler.put(metric, *args) 

187 self.assertIsInstance(ref, DatasetRef) 

188 

189 # Test getDirect 

190 metricOut = butler.getDirect(ref) 

191 self.assertEqual(metric, metricOut) 

192 # Test get 

193 metricOut = butler.get(ref.datasetType.name, dataId) 

194 self.assertEqual(metric, metricOut) 

195 # Test get with a datasetRef 

196 metricOut = butler.get(ref) 

197 self.assertEqual(metric, metricOut) 

198 # Test getDeferred with dataId 

199 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get() 

200 self.assertEqual(metric, metricOut) 

201 # Test getDeferred with a datasetRef 

202 metricOut = butler.getDeferred(ref).get() 

203 self.assertEqual(metric, metricOut) 

204 

205 # Check we can get components 

206 if storageClass.isComposite(): 

207 self.assertGetComponents(butler, ref, 

208 ("summary", "data", "output"), metric) 

209 

210 # Remove from the tagged collection only; after that we 

211 # shouldn't be able to find it unless we use the dataset_id. 

212 butler.pruneDatasets([ref]) 

213 with self.assertRaises(LookupError): 

214 butler.datasetExists(*args) 

215 # Registry still knows about it, if we use the dataset_id. 

216 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

217 # If we use the output ref with the dataset_id, we should 

218 # still be able to load it with getDirect(). 

219 self.assertEqual(metric, butler.getDirect(ref)) 

220 

221 # Reinsert into collection, then delete from Datastore *and* 

222 # remove from collection. 

223 butler.registry.associate(tag, [ref]) 

224 butler.pruneDatasets([ref], unstore=True) 

225 # Lookup with original args should still fail. 

226 with self.assertRaises(LookupError): 

227 butler.datasetExists(*args) 

228 # Now getDirect() should fail, too. 

229 with self.assertRaises(FileNotFoundError, msg=f"Checking ref {ref} not found"): 

230 butler.getDirect(ref) 

231 # Registry still knows about it, if we use the dataset_id. 

232 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

233 

234 # Now remove the dataset completely. 

235 butler.pruneDatasets([ref], purge=True, unstore=True) 

236 # Lookup with original args should still fail. 

237 with self.assertRaises(LookupError): 

238 butler.datasetExists(*args) 

239 # getDirect() should still fail. 

240 with self.assertRaises(FileNotFoundError): 

241 butler.getDirect(ref) 

242 # Registry shouldn't be able to find it by dataset_id anymore. 

243 self.assertIsNone(butler.registry.getDataset(ref.id)) 

244 

245 # Put the dataset again, since the last thing we did was remove it. 

246 ref = butler.put(metric, refIn) 

247 

248 # Get with parameters 

249 stop = 4 

250 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

251 self.assertNotEqual(metric, sliced) 

252 self.assertEqual(metric.summary, sliced.summary) 

253 self.assertEqual(metric.output, sliced.output) 

254 self.assertEqual(metric.data[:stop], sliced.data) 

255 # getDeferred with parameters 

256 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

257 self.assertNotEqual(metric, sliced) 

258 self.assertEqual(metric.summary, sliced.summary) 

259 self.assertEqual(metric.output, sliced.output) 

260 self.assertEqual(metric.data[:stop], sliced.data) 

261 # getDeferred with deferred parameters 

262 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

263 self.assertNotEqual(metric, sliced) 

264 self.assertEqual(metric.summary, sliced.summary) 

265 self.assertEqual(metric.output, sliced.output) 

266 self.assertEqual(metric.data[:stop], sliced.data) 

267 

268 if storageClass.isComposite(): 

269 # Check that components can be retrieved 

270 # ref.compfonents will only be populated in certain cases 

271 metricOut = butler.get(ref.datasetType.name, dataId) 

272 compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary") 

273 compNameD = DatasetType.nameWithComponent(datasetTypeName, "data") 

274 summary = butler.get(compNameS, dataId) 

275 self.assertEqual(summary, metric.summary) 

276 data = butler.get(compNameD, dataId) 

277 self.assertEqual(data, metric.data) 

278 

279 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

280 self.assertTrue(compRef.hasParentId) 

281 

282 # Create a Dataset type that has the same name but is inconsistent. 

283 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions, 

284 self.storageClassFactory.getStorageClass("Config")) 

285 

286 # Getting with a dataset type that does not match registry fails 

287 with self.assertRaises(ValueError): 

288 butler.get(inconsistentDatasetType, dataId) 

289 

290 # Combining a DatasetRef with a dataId should fail 

291 with self.assertRaises(ValueError): 

292 butler.get(ref, dataId) 

293 # Getting with an explicit ref should fail if the id doesn't match 

294 with self.assertRaises(ValueError): 

295 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

296 

297 # Getting a dataset with unknown parameters should fail 

298 with self.assertRaises(KeyError): 

299 butler.get(ref, parameters={"unsupported": True}) 

300 

301 # Check we have a collection 

302 collections = set(butler.registry.queryCollections()) 

303 self.assertEqual(collections, {run, tag}) 

304 

305 # Clean up to check that we can remove something that may have 

306 # already had a component removed 

307 butler.pruneDatasets([ref], unstore=True, purge=True) 

308 

309 # Add a dataset back in since some downstream tests require 

310 # something to be present 

311 ref = butler.put(metric, refIn) 

312 

313 return butler 

314 

315 def testDeferredCollectionPassing(self): 

316 # Construct a butler with no run or collection, but make it writeable. 

317 butler = Butler(self.tmpConfigFile, writeable=True) 

318 # Create and register a DatasetType 

319 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

320 datasetType = self.addDatasetType("example", dimensions, 

321 self.storageClassFactory.getStorageClass("StructuredData"), 

322 butler.registry) 

323 # Add needed Dimensions 

324 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

325 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

326 "name": "d-r", 

327 "abstract_filter": "R"}) 

328 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

329 "name": "fourtwentythree", "physical_filter": "d-r"}) 

330 dataId = {"instrument": "DummyCamComp", "visit": 423} 

331 # Create dataset. 

332 metric = makeExampleMetrics() 

333 # Register a new run and put dataset. 

334 run = "deferred" 

335 butler.registry.registerRun(run) 

336 ref = butler.put(metric, datasetType, dataId, run=run) 

337 # Putting with no run should fail with TypeError. 

338 with self.assertRaises(TypeError): 

339 butler.put(metric, datasetType, dataId) 

340 # Dataset should exist. 

341 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

342 # We should be able to get the dataset back, but with and without 

343 # a deferred dataset handle. 

344 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

345 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

346 # Trying to find the dataset without any collection is a TypeError. 

347 with self.assertRaises(TypeError): 

348 butler.datasetExists(datasetType, dataId) 

349 with self.assertRaises(TypeError): 

350 butler.get(datasetType, dataId) 

351 # Associate the dataset with a different collection. 

352 butler.registry.registerCollection("tagged") 

353 butler.registry.associate("tagged", [ref]) 

354 # Deleting the dataset from the new collection should make it findable 

355 # in the original collection. 

356 butler.pruneDatasets([ref], tags=["tagged"]) 

357 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

358 

359 

360class ButlerTests(ButlerPutGetTests): 

361 """Tests for Butler. 

362 """ 

363 useTempRoot = True 

364 

365 def setUp(self): 

366 """Create a new butler root for each test.""" 

367 if self.useTempRoot: 

368 self.root = tempfile.mkdtemp(dir=TESTDIR) 

369 Butler.makeRepo(self.root, config=Config(self.configFile)) 

370 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

371 else: 

372 self.root = None 

373 self.tmpConfigFile = self.configFile 

374 

375 def testConstructor(self): 

376 """Independent test of constructor. 

377 """ 

378 butler = Butler(self.tmpConfigFile, run="ingest") 

379 self.assertIsInstance(butler, Butler) 

380 

381 collections = set(butler.registry.queryCollections()) 

382 self.assertEqual(collections, {"ingest"}) 

383 

384 butler2 = Butler(butler=butler, collections=["other"]) 

385 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"])) 

386 self.assertIsNone(butler2.run) 

387 self.assertIs(butler.registry, butler2.registry) 

388 self.assertIs(butler.datastore, butler2.datastore) 

389 

390 def testBasicPutGet(self): 

391 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

392 self.runPutGetTest(storageClass, "test_metric") 

393 

394 def testCompositePutGetConcrete(self): 

395 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

396 butler = self.runPutGetTest(storageClass, "test_metric") 

397 

398 # Should *not* be disassembled 

399 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

400 self.assertEqual(len(datasets), 1) 

401 uri, components = butler.getURIs(datasets[0]) 

402 self.assertIsInstance(uri, ButlerURI) 

403 self.assertFalse(components) 

404 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

405 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

406 

407 # Predicted dataset 

408 dataId = {"instrument": "DummyCamComp", "visit": 424} 

409 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

410 self.assertFalse(components) 

411 self.assertIsInstance(uri, ButlerURI) 

412 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

413 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

414 

415 def testCompositePutGetVirtual(self): 

416 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

417 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

418 

419 # Should be disassembled 

420 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

421 self.assertEqual(len(datasets), 1) 

422 uri, components = butler.getURIs(datasets[0]) 

423 

424 if butler.datastore.isEphemeral: 

425 # Never disassemble in-memory datastore 

426 self.assertIsInstance(uri, ButlerURI) 

427 self.assertFalse(components) 

428 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

429 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

430 else: 

431 self.assertIsNone(uri) 

432 self.assertEqual(set(components), set(storageClass.components)) 

433 for compuri in components.values(): 

434 self.assertIsInstance(compuri, ButlerURI) 

435 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

436 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

437 

438 # Predicted dataset 

439 dataId = {"instrument": "DummyCamComp", "visit": 424} 

440 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

441 

442 if butler.datastore.isEphemeral: 

443 # Never disassembled 

444 self.assertIsInstance(uri, ButlerURI) 

445 self.assertFalse(components) 

446 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

447 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

448 else: 

449 self.assertIsNone(uri) 

450 self.assertEqual(set(components), set(storageClass.components)) 

451 for compuri in components.values(): 

452 self.assertIsInstance(compuri, ButlerURI) 

453 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

454 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

455 

456 def testIngest(self): 

457 butler = Butler(self.tmpConfigFile, run="ingest") 

458 

459 # Create and register a DatasetType 

460 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

461 

462 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

463 datasetTypeName = "metric" 

464 

465 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

466 

467 # Add needed Dimensions 

468 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

469 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

470 "name": "d-r", 

471 "abstract_filter": "R"}) 

472 for detector in (1, 2): 

473 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

474 "full_name": f"detector{detector}"}) 

475 

476 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

477 "name": "fourtwentythree", "physical_filter": "d-r"}, 

478 {"instrument": "DummyCamComp", "id": 424, 

479 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

480 

481 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

482 dataRoot = os.path.join(TESTDIR, "data", "basic") 

483 datasets = [] 

484 for detector in (1, 2): 

485 detector_name = f"detector_{detector}" 

486 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

487 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

488 # Create a DatasetRef for ingest 

489 refIn = DatasetRef(datasetType, dataId, id=None) 

490 

491 datasets.append(FileDataset(path=metricFile, 

492 refs=[refIn], 

493 formatter=formatter)) 

494 

495 butler.ingest(*datasets, transfer="copy") 

496 

497 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

498 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

499 

500 metrics1 = butler.get(datasetTypeName, dataId1) 

501 metrics2 = butler.get(datasetTypeName, dataId2) 

502 self.assertNotEqual(metrics1, metrics2) 

503 

504 # Compare URIs 

505 uri1 = butler.getURI(datasetTypeName, dataId1) 

506 uri2 = butler.getURI(datasetTypeName, dataId2) 

507 self.assertNotEqual(uri1, uri2) 

508 

509 # Now do a multi-dataset but single file ingest 

510 metricFile = os.path.join(dataRoot, "detectors.yaml") 

511 refs = [] 

512 for detector in (1, 2): 

513 detector_name = f"detector_{detector}" 

514 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

515 # Create a DatasetRef for ingest 

516 refs.append(DatasetRef(datasetType, dataId, id=None)) 

517 

518 datasets = [] 

519 datasets.append(FileDataset(path=metricFile, 

520 refs=refs, 

521 formatter=MultiDetectorFormatter)) 

522 

523 butler.ingest(*datasets, transfer="copy") 

524 

525 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

526 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

527 

528 multi1 = butler.get(datasetTypeName, dataId1) 

529 multi2 = butler.get(datasetTypeName, dataId2) 

530 

531 self.assertEqual(multi1, metrics1) 

532 self.assertEqual(multi2, metrics2) 

533 

534 # Compare URIs 

535 uri1 = butler.getURI(datasetTypeName, dataId1) 

536 uri2 = butler.getURI(datasetTypeName, dataId2) 

537 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

538 

539 # Test that removing one does not break the second 

540 # This line will issue a warning log message for a ChainedDatastore 

541 # that uses an InMemoryDatastore since in-memory can not ingest 

542 # files. 

543 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

544 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

545 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

546 multi2b = butler.get(datasetTypeName, dataId2) 

547 self.assertEqual(multi2, multi2b) 

548 

549 def testPruneCollections(self): 

550 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

551 butler = Butler(self.tmpConfigFile, writeable=True) 

552 # Load registry data with dimensions to hang datasets off of. 

553 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

554 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

555 # Add some RUN-type collections. 

556 run1 = "run1" 

557 butler.registry.registerRun(run1) 

558 run2 = "run2" 

559 butler.registry.registerRun(run2) 

560 # put some datasets. ref1 and ref2 have the same data ID, and are in 

561 # different runs. ref3 has a different data ID. 

562 metric = makeExampleMetrics() 

563 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

564 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

565 butler.registry) 

566 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

567 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

568 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

569 # Try to delete a RUN collection without purge, or with purge and not 

570 # unstore. 

571 with self.assertRaises(TypeError): 

572 butler.pruneCollection(run1) 

573 with self.assertRaises(TypeError): 

574 butler.pruneCollection(run2, purge=True) 

575 # Add a TAGGED collection and associate ref3 only into it. 

576 tag1 = "tag1" 

577 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

578 butler.registry.associate(tag1, [ref3]) 

579 # Add a CHAINED collection that searches run1 and then run2. It 

580 # logically contains only ref1, because ref2 is shadowed due to them 

581 # having the same data ID and dataset type. 

582 chain1 = "chain1" 

583 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

584 butler.registry.setCollectionChain(chain1, [run1, run2]) 

585 # Try to delete RUN collections, which should fail with complete 

586 # rollback because they're still referenced by the CHAINED 

587 # collection. 

588 with self.assertRaises(Exception): 

589 butler.pruneCollection(run1, pruge=True, unstore=True) 

590 with self.assertRaises(Exception): 

591 butler.pruneCollection(run2, pruge=True, unstore=True) 

592 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

593 [ref1, ref2, ref3]) 

594 self.assertTrue(butler.datastore.exists(ref1)) 

595 self.assertTrue(butler.datastore.exists(ref2)) 

596 self.assertTrue(butler.datastore.exists(ref3)) 

597 # Try to delete CHAINED and TAGGED collections with purge; should not 

598 # work. 

599 with self.assertRaises(TypeError): 

600 butler.pruneCollection(tag1, purge=True, unstore=True) 

601 with self.assertRaises(TypeError): 

602 butler.pruneCollection(chain1, purge=True, unstore=True) 

603 # Remove the tagged collection with unstore=False. This should not 

604 # affect the datasets. 

605 butler.pruneCollection(tag1) 

606 with self.assertRaises(MissingCollectionError): 

607 butler.registry.getCollectionType(tag1) 

608 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

609 [ref1, ref2, ref3]) 

610 self.assertTrue(butler.datastore.exists(ref1)) 

611 self.assertTrue(butler.datastore.exists(ref2)) 

612 self.assertTrue(butler.datastore.exists(ref3)) 

613 # Add the tagged collection back in, and remove it with unstore=True. 

614 # This should remove ref3 only from the datastore. 

615 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

616 butler.registry.associate(tag1, [ref3]) 

617 butler.pruneCollection(tag1, unstore=True) 

618 with self.assertRaises(MissingCollectionError): 

619 butler.registry.getCollectionType(tag1) 

620 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

621 [ref1, ref2, ref3]) 

622 self.assertTrue(butler.datastore.exists(ref1)) 

623 self.assertTrue(butler.datastore.exists(ref2)) 

624 self.assertFalse(butler.datastore.exists(ref3)) 

625 # Delete the chain with unstore=False. The datasets should not be 

626 # affected at all. 

627 butler.pruneCollection(chain1) 

628 with self.assertRaises(MissingCollectionError): 

629 butler.registry.getCollectionType(chain1) 

630 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

631 [ref1, ref2, ref3]) 

632 self.assertTrue(butler.datastore.exists(ref1)) 

633 self.assertTrue(butler.datastore.exists(ref2)) 

634 self.assertFalse(butler.datastore.exists(ref3)) 

635 # Redefine and then delete the chain with unstore=True. Only ref1 

636 # should be unstored (ref3 has already been unstored, but otherwise 

637 # would be now). 

638 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

639 butler.registry.setCollectionChain(chain1, [run1, run2]) 

640 butler.pruneCollection(chain1, unstore=True) 

641 with self.assertRaises(MissingCollectionError): 

642 butler.registry.getCollectionType(chain1) 

643 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

644 [ref1, ref2, ref3]) 

645 self.assertFalse(butler.datastore.exists(ref1)) 

646 self.assertTrue(butler.datastore.exists(ref2)) 

647 self.assertFalse(butler.datastore.exists(ref3)) 

648 # Remove run1. This removes ref1 and ref3 from the registry (they're 

649 # already gone from the datastore, which is fine). 

650 butler.pruneCollection(run1, purge=True, unstore=True) 

651 with self.assertRaises(MissingCollectionError): 

652 butler.registry.getCollectionType(run1) 

653 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

654 [ref2]) 

655 self.assertTrue(butler.datastore.exists(ref2)) 

656 # Remove run2. This removes ref2 from the registry and the datastore. 

657 butler.pruneCollection(run2, purge=True, unstore=True) 

658 with self.assertRaises(MissingCollectionError): 

659 butler.registry.getCollectionType(run2) 

660 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

661 []) 

662 

663 def testPickle(self): 

664 """Test pickle support. 

665 """ 

666 butler = Butler(self.tmpConfigFile, run="ingest") 

667 butlerOut = pickle.loads(pickle.dumps(butler)) 

668 self.assertIsInstance(butlerOut, Butler) 

669 self.assertEqual(butlerOut._config, butler._config) 

670 self.assertEqual(butlerOut.collections, butler.collections) 

671 self.assertEqual(butlerOut.run, butler.run) 

672 

673 def testGetDatasetTypes(self): 

674 butler = Butler(self.tmpConfigFile, run="ingest") 

675 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

676 dimensionEntries = [ 

677 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

678 {"instrument": "DummyCamComp"}), 

679 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}), 

680 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

681 ] 

682 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

683 # Add needed Dimensions 

684 for args in dimensionEntries: 

685 butler.registry.insertDimensionData(*args) 

686 

687 # When a DatasetType is added to the registry entries are created 

688 # for each component. Need entries for each component in the test 

689 # configuration otherwise validation won't work. The ones that 

690 # are deliberately broken will be ignored later. 

691 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

692 components = set() 

693 for datasetTypeName in datasetTypeNames: 

694 # Create and register a DatasetType 

695 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

696 

697 for componentName in storageClass.components: 

698 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

699 

700 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

701 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

702 

703 # Now that we have some dataset types registered, validate them 

704 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

705 "datasetType.component"]) 

706 

707 # Add a new datasetType that will fail template validation 

708 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

709 if self.validationCanFail: 

710 with self.assertRaises(ValidationError): 

711 butler.validateConfiguration() 

712 

713 # Rerun validation but with a subset of dataset type names 

714 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

715 

716 # Rerun validation but ignore the bad datasetType 

717 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

718 "datasetType.component"]) 

719 

720 def testTransaction(self): 

721 butler = Butler(self.tmpConfigFile, run="ingest") 

722 datasetTypeName = "test_metric" 

723 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

724 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

725 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

726 "abstract_filter": "R"}), 

727 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

728 "physical_filter": "d-r"})) 

729 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

730 metric = makeExampleMetrics() 

731 dataId = {"instrument": "DummyCam", "visit": 42} 

732 # Create and register a DatasetType 

733 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

734 with self.assertRaises(TransactionTestError): 

735 with butler.transaction(): 

736 # Add needed Dimensions 

737 for args in dimensionEntries: 

738 butler.registry.insertDimensionData(*args) 

739 # Store a dataset 

740 ref = butler.put(metric, datasetTypeName, dataId) 

741 self.assertIsInstance(ref, DatasetRef) 

742 # Test getDirect 

743 metricOut = butler.getDirect(ref) 

744 self.assertEqual(metric, metricOut) 

745 # Test get 

746 metricOut = butler.get(datasetTypeName, dataId) 

747 self.assertEqual(metric, metricOut) 

748 # Check we can get components 

749 self.assertGetComponents(butler, ref, 

750 ("summary", "data", "output"), metric) 

751 raise TransactionTestError("This should roll back the entire transaction") 

752 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"): 

753 butler.registry.expandDataId(dataId) 

754 # Should raise LookupError for missing data ID value 

755 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

756 butler.get(datasetTypeName, dataId) 

757 # Also check explicitly if Dataset entry is missing 

758 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

759 # Direct retrieval should not find the file in the Datastore 

760 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

761 butler.getDirect(ref) 

762 

763 def testMakeRepo(self): 

764 """Test that we can write butler configuration to a new repository via 

765 the Butler.makeRepo interface and then instantiate a butler from the 

766 repo root. 

767 """ 

768 # Do not run the test if we know this datastore configuration does 

769 # not support a file system root 

770 if self.fullConfigKey is None: 

771 return 

772 

773 # Remove the file created in setUp 

774 os.unlink(self.tmpConfigFile) 

775 

776 createRegistry = not self.useTempRoot 

777 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile), 

778 createRegistry=createRegistry) 

779 limited = Config(self.configFile) 

780 butler1 = Butler(butlerConfig) 

781 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

782 config=Config(self.configFile), overwrite=True) 

783 full = Config(self.tmpConfigFile) 

784 butler2 = Butler(butlerConfig) 

785 # Butlers should have the same configuration regardless of whether 

786 # defaults were expanded. 

787 self.assertEqual(butler1._config, butler2._config) 

788 # Config files loaded directly should not be the same. 

789 self.assertNotEqual(limited, full) 

790 # Make sure "limited" doesn't have a few keys we know it should be 

791 # inheriting from defaults. 

792 self.assertIn(self.fullConfigKey, full) 

793 self.assertNotIn(self.fullConfigKey, limited) 

794 

795 # Collections don't appear until something is put in them 

796 collections1 = set(butler1.registry.queryCollections()) 

797 self.assertEqual(collections1, set()) 

798 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

799 

800 # Check that a config with no associated file name will not 

801 # work properly with relocatable Butler repo 

802 butlerConfig.configFile = None 

803 with self.assertRaises(ValueError): 

804 Butler(butlerConfig) 

805 

806 with self.assertRaises(FileExistsError): 

807 Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

808 config=Config(self.configFile), overwrite=False) 

809 

810 def testStringification(self): 

811 butler = Butler(self.tmpConfigFile, run="ingest") 

812 butlerStr = str(butler) 

813 

814 if self.datastoreStr is not None: 

815 for testStr in self.datastoreStr: 

816 self.assertIn(testStr, butlerStr) 

817 if self.registryStr is not None: 

818 self.assertIn(self.registryStr, butlerStr) 

819 

820 datastoreName = butler.datastore.name 

821 if self.datastoreName is not None: 

822 for testStr in self.datastoreName: 

823 self.assertIn(testStr, datastoreName) 

824 

825 

826class FileLikeDatastoreButlerTests(ButlerTests): 

827 """Common tests and specialization of ButlerTests for butlers backed 

828 by datastores that inherit from FileLikeDatastore. 

829 """ 

830 

831 def checkFileExists(self, root, path): 

832 """Checks if file exists at a given path (relative to root). 

833 

834 Test testPutTemplates verifies actual physical existance of the files 

835 in the requested location. For POSIXDatastore this test is equivalent 

836 to `os.path.exists` call. 

837 """ 

838 return os.path.exists(os.path.join(root, path)) 

839 

840 def testPutTemplates(self): 

841 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

842 butler = Butler(self.tmpConfigFile, run="ingest") 

843 

844 # Add needed Dimensions 

845 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

846 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

847 "name": "d-r", 

848 "abstract_filter": "R"}) 

849 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

850 "physical_filter": "d-r"}) 

851 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

852 "physical_filter": "d-r"}) 

853 

854 # Create and store a dataset 

855 metric = makeExampleMetrics() 

856 

857 # Create two almost-identical DatasetTypes (both will use default 

858 # template) 

859 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

860 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

861 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

862 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

863 

864 dataId1 = {"instrument": "DummyCamComp", "visit": np.int64(423)} 

865 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

866 dataId3 = {"instrument": "DummyCamComp", "visit": 425} 

867 

868 # Put with exactly the data ID keys needed 

869 ref = butler.put(metric, "metric1", dataId1) 

870 self.assertTrue(self.checkFileExists(butler.datastore.root, 

871 "ingest/metric1/d-r/DummyCamComp_423.pickle")) 

872 

873 # Check the template based on dimensions 

874 butler.datastore.templates.validateTemplates([ref]) 

875 

876 # Put with extra data ID keys (physical_filter is an optional 

877 # dependency); should not change template (at least the way we're 

878 # defining them to behave now; the important thing is that they 

879 # must be consistent). 

880 ref = butler.put(metric, "metric2", dataId2) 

881 self.assertTrue(self.checkFileExists(butler.datastore.root, 

882 "ingest/metric2/d-r/DummyCamComp_v423.pickle")) 

883 

884 # Check the template based on dimensions 

885 butler.datastore.templates.validateTemplates([ref]) 

886 

887 # Now use a file template that will not result in unique filenames 

888 ref = butler.put(metric, "metric3", dataId1) 

889 

890 # Check the template based on dimensions. This one is a bad template 

891 with self.assertRaises(FileTemplateValidationError): 

892 butler.datastore.templates.validateTemplates([ref]) 

893 

894 with self.assertRaises(FileExistsError): 

895 butler.put(metric, "metric3", dataId3) 

896 

897 def testImportExport(self): 

898 # Run put/get tests just to create and populate a repo. 

899 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

900 self.runImportExportTest(storageClass) 

901 

902 @unittest.expectedFailure 

903 def testImportExportVirtualComposite(self): 

904 # Run put/get tests just to create and populate a repo. 

905 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

906 self.runImportExportTest(storageClass) 

907 

908 def runImportExportTest(self, storageClass): 

909 exportButler = self.runPutGetTest(storageClass, "test_metric") 

910 # Test that the repo actually has at least one dataset. 

911 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

912 self.assertGreater(len(datasets), 0) 

913 # Export those datasets. We used TemporaryDirectory because there 

914 # doesn't seem to be a way to get the filename (as opposed to the file 

915 # object) from any of tempfile's temporary-file context managers. 

916 with tempfile.TemporaryDirectory() as exportDir: 

917 # TODO: When PosixDatastore supports transfer-on-exist, add tests 

918 # for that. 

919 exportFile = os.path.join(exportDir, "exports.yaml") 

920 with exportButler.export(filename=exportFile) as export: 

921 export.saveDatasets(datasets) 

922 self.assertTrue(os.path.exists(exportFile)) 

923 with tempfile.TemporaryDirectory() as importDir: 

924 Butler.makeRepo(importDir, config=Config(self.configFile)) 

925 # Calling script.butlerImport tests the implementation of the 

926 # butler command line interface "import" subcommand. Functions 

927 # in the script folder are generally considered protected and 

928 # should not be used as public api. 

929 with open(exportFile, "r") as f: 

930 script.butlerImport(importDir, output_run="ingest/run", export_file=f, 

931 directory=exportButler.datastore.root, transfer="symlink") 

932 importButler = Butler(importDir, run="ingest/run") 

933 for ref in datasets: 

934 with self.subTest(ref=ref): 

935 # Test for existence by passing in the DatasetType and 

936 # data ID separately, to avoid lookup by dataset_id. 

937 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

938 

939 

940class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

941 """PosixDatastore specialization of a butler""" 

942 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

943 fullConfigKey = ".datastore.formatters" 

944 validationCanFail = True 

945 datastoreStr = ["/tmp"] 

946 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"] 

947 registryStr = "/gen3.sqlite3" 

948 

949 

950class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

951 """InMemoryDatastore specialization of a butler""" 

952 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

953 fullConfigKey = None 

954 useTempRoot = False 

955 validationCanFail = False 

956 datastoreStr = ["datastore='InMemory"] 

957 datastoreName = ["InMemoryDatastore@"] 

958 registryStr = ":memory:" 

959 

960 def testIngest(self): 

961 pass 

962 

963 

964class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

965 """PosixDatastore specialization""" 

966 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

967 fullConfigKey = ".datastore.datastores.1.formatters" 

968 validationCanFail = True 

969 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"] 

970 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1", 

971 "SecondDatastore"] 

972 registryStr = "/gen3.sqlite3" 

973 

974 

975class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

976 """Test that a yaml file in one location can refer to a root in another.""" 

977 

978 datastoreStr = ["dir1"] 

979 # Disable the makeRepo test since we are deliberately not using 

980 # butler.yaml as the config name. 

981 fullConfigKey = None 

982 

983 def setUp(self): 

984 self.root = tempfile.mkdtemp(dir=TESTDIR) 

985 

986 # Make a new repository in one place 

987 self.dir1 = os.path.join(self.root, "dir1") 

988 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

989 

990 # Move the yaml file to a different place and add a "root" 

991 self.dir2 = os.path.join(self.root, "dir2") 

992 safeMakeDir(self.dir2) 

993 configFile1 = os.path.join(self.dir1, "butler.yaml") 

994 config = Config(configFile1) 

995 config["root"] = self.dir1 

996 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

997 config.dumpToFile(configFile2) 

998 os.remove(configFile1) 

999 self.tmpConfigFile = configFile2 

1000 

1001 def testFileLocations(self): 

1002 self.assertNotEqual(self.dir1, self.dir2) 

1003 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1004 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1005 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1006 

1007 

1008class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1009 """Test that a config file created by makeRepo outside of repo works.""" 

1010 

1011 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1012 

1013 def setUp(self): 

1014 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1015 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1016 

1017 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1018 Butler.makeRepo(self.root, config=Config(self.configFile), 

1019 outfile=self.tmpConfigFile) 

1020 

1021 def tearDown(self): 

1022 if os.path.exists(self.root2): 

1023 shutil.rmtree(self.root2, ignore_errors=True) 

1024 super().tearDown() 

1025 

1026 def testConfigExistence(self): 

1027 c = Config(self.tmpConfigFile) 

1028 uri_config = ButlerURI(c["root"]) 

1029 uri_expected = ButlerURI(self.root, forceDirectory=True) 

1030 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1031 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1032 

1033 def testPutGet(self): 

1034 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1035 self.runPutGetTest(storageClass, "test_metric") 

1036 

1037 

1038class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1039 """Test that a config file created by makeRepo outside of repo works.""" 

1040 

1041 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1042 

1043 def setUp(self): 

1044 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1045 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1046 

1047 self.tmpConfigFile = self.root2 

1048 Butler.makeRepo(self.root, config=Config(self.configFile), 

1049 outfile=self.tmpConfigFile) 

1050 

1051 def testConfigExistence(self): 

1052 # Append the yaml file else Config constructor does not know the file 

1053 # type. 

1054 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1055 super().testConfigExistence() 

1056 

1057 

1058class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1059 """Test that a config file created by makeRepo outside of repo works.""" 

1060 

1061 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1062 

1063 def setUp(self): 

1064 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1065 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1066 

1067 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

1068 Butler.makeRepo(self.root, config=Config(self.configFile), 

1069 outfile=self.tmpConfigFile) 

1070 

1071 

1072@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1073@mock_s3 

1074class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

1075 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1076 a local in-memory SqlRegistry. 

1077 """ 

1078 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1079 fullConfigKey = None 

1080 validationCanFail = True 

1081 

1082 bucketName = "anybucketname" 

1083 """Name of the Bucket that will be used in the tests. The name is read from 

1084 the config file used with the tests during set-up. 

1085 """ 

1086 

1087 root = "butlerRoot/" 

1088 """Root repository directory expected to be used in case useTempRoot=False. 

1089 Otherwise the root is set to a 20 characters long randomly generated string 

1090 during set-up. 

1091 """ 

1092 

1093 datastoreStr = [f"datastore={root}"] 

1094 """Contains all expected root locations in a format expected to be 

1095 returned by Butler stringification. 

1096 """ 

1097 

1098 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"] 

1099 """The expected format of the S3Datastore string.""" 

1100 

1101 registryStr = ":memory:" 

1102 """Expected format of the Registry string.""" 

1103 

1104 def genRoot(self): 

1105 """Returns a random string of len 20 to serve as a root 

1106 name for the temporary bucket repo. 

1107 

1108 This is equivalent to tempfile.mkdtemp as this is what self.root 

1109 becomes when useTempRoot is True. 

1110 """ 

1111 rndstr = "".join( 

1112 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1113 ) 

1114 return rndstr + "/" 

1115 

1116 def setUp(self): 

1117 config = Config(self.configFile) 

1118 uri = ButlerURI(config[".datastore.datastore.root"]) 

1119 self.bucketName = uri.netloc 

1120 

1121 # set up some fake credentials if they do not exist 

1122 self.usingDummyCredentials = setAwsEnvCredentials() 

1123 

1124 if self.useTempRoot: 

1125 self.root = self.genRoot() 

1126 rooturi = f"s3://{self.bucketName}/{self.root}" 

1127 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1128 

1129 # MOTO needs to know that we expect Bucket bucketname to exist 

1130 # (this used to be the class attribute bucketName) 

1131 s3 = boto3.resource("s3") 

1132 s3.create_bucket(Bucket=self.bucketName) 

1133 

1134 self.datastoreStr = f"datastore={self.root}" 

1135 self.datastoreName = [f"S3Datastore@{rooturi}"] 

1136 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1137 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1138 

1139 def tearDown(self): 

1140 s3 = boto3.resource("s3") 

1141 bucket = s3.Bucket(self.bucketName) 

1142 try: 

1143 bucket.objects.all().delete() 

1144 except botocore.exceptions.ClientError as e: 

1145 if e.response["Error"]["Code"] == "404": 

1146 # the key was not reachable - pass 

1147 pass 

1148 else: 

1149 raise 

1150 

1151 bucket = s3.Bucket(self.bucketName) 

1152 bucket.delete() 

1153 

1154 # unset any potentially set dummy credentials 

1155 if self.usingDummyCredentials: 

1156 unsetAwsEnvCredentials() 

1157 

1158 def checkFileExists(self, root, relpath): 

1159 """Checks if file exists at a given path (relative to root). 

1160 

1161 Test testPutTemplates verifies actual physical existance of the files 

1162 in the requested location. For S3Datastore this test is equivalent to 

1163 `lsst.daf.butler.core.s3utils.s3checkFileExists` call. 

1164 """ 

1165 uri = ButlerURI(root) 

1166 uri.updateFile(relpath) 

1167 return s3CheckFileExists(uri)[0] 

1168 

1169 @unittest.expectedFailure 

1170 def testImportExport(self): 

1171 super().testImportExport() 

1172 

1173 

1174if __name__ == "__main__": 1174 ↛ 1175line 1174 didn't jump to line 1175, because the condition on line 1174 was never true

1175 unittest.main()