Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import os 

26import posixpath 

27import unittest 

28import tempfile 

29import shutil 

30import pickle 

31import string 

32import random 

33 

34try: 

35 import boto3 

36 import botocore 

37 from moto import mock_s3 

38except ImportError: 

39 boto3 = None 

40 

41 def mock_s3(cls): 

42 """A no-op decorator in case moto mock_s3 can not be imported. 

43 """ 

44 return cls 

45 

46import astropy.time 

47from lsst.utils import doImport 

48from lsst.daf.butler.core.utils import safeMakeDir 

49from lsst.daf.butler import Butler, Config, ButlerConfig 

50from lsst.daf.butler import StorageClassFactory 

51from lsst.daf.butler import DatasetType, DatasetRef 

52from lsst.daf.butler import FileTemplateValidationError, ValidationError 

53from lsst.daf.butler import FileDataset 

54from lsst.daf.butler import CollectionSearch, CollectionType 

55from lsst.daf.butler import ButlerURI 

56from lsst.daf.butler import script 

57from lsst.daf.butler.registry import MissingCollectionError 

58from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

59from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials, 

60 unsetAwsEnvCredentials) 

61 

62from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

63 

64TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

65 

66 

67def makeExampleMetrics(): 

68 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

69 {"a": [1, 2, 3], 

70 "b": {"blue": 5, "red": "green"}}, 

71 [563, 234, 456.7, 752, 8, 9, 27] 

72 ) 

73 

74 

75class TransactionTestError(Exception): 

76 """Specific error for testing transactions, to prevent misdiagnosing 

77 that might otherwise occur when a standard exception is used. 

78 """ 

79 pass 

80 

81 

82class ButlerConfigTests(unittest.TestCase): 

83 """Simple tests for ButlerConfig that are not tested in other test cases. 

84 """ 

85 

86 def testSearchPath(self): 

87 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

88 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

89 config1 = ButlerConfig(configFile) 

90 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

91 

92 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

93 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

94 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

95 self.assertIn("testConfigs", "\n".join(cm.output)) 

96 

97 key = ("datastore", "records", "table") 

98 self.assertNotEqual(config1[key], config2[key]) 

99 self.assertEqual(config2[key], "override_record") 

100 

101 

102class ButlerPutGetTests: 

103 """Helper method for running a suite of put/get tests from different 

104 butler configurations.""" 

105 

106 root = None 

107 

108 @staticmethod 

109 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

110 """Create a DatasetType and register it 

111 """ 

112 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

113 registry.registerDatasetType(datasetType) 

114 return datasetType 

115 

116 @classmethod 

117 def setUpClass(cls): 

118 cls.storageClassFactory = StorageClassFactory() 

119 cls.storageClassFactory.addFromConfig(cls.configFile) 

120 

121 def assertGetComponents(self, butler, datasetRef, components, reference): 

122 datasetType = datasetRef.datasetType 

123 dataId = datasetRef.dataId 

124 for component in components: 

125 compTypeName = datasetType.componentTypeName(component) 

126 result = butler.get(compTypeName, dataId) 

127 self.assertEqual(result, getattr(reference, component)) 

128 

129 def tearDown(self): 

130 if self.root is not None and os.path.exists(self.root): 

131 shutil.rmtree(self.root, ignore_errors=True) 

132 

133 def runPutGetTest(self, storageClass, datasetTypeName): 

134 # New datasets will be added to run and tag, but we will only look in 

135 # tag when looking up datasets. 

136 run = "ingest/run" 

137 tag = "ingest" 

138 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag]) 

139 

140 # There will not be a collection yet 

141 collections = set(butler.registry.queryCollections()) 

142 self.assertEqual(collections, set([run, tag])) 

143 

144 # Create and register a DatasetType 

145 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

146 

147 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

148 

149 # Try to create one that will have a name that is too long 

150 with self.assertRaises(Exception) as cm: 

151 self.addDatasetType("DatasetTypeNameTooLong" * 50, dimensions, storageClass, butler.registry) 

152 self.assertIn("check constraint", str(cm.exception).lower()) 

153 

154 # Add needed Dimensions 

155 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

156 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

157 "name": "d-r", 

158 "abstract_filter": "R"}) 

159 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp", 

160 "id": 1, 

161 "name": "default"}) 

162 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

163 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

164 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

165 "name": "fourtwentythree", "physical_filter": "d-r", 

166 "visit_system": 1, "datetime_begin": visit_start, 

167 "datetime_end": visit_end}) 

168 

169 # Add a second visit for some later tests 

170 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424, 

171 "name": "fourtwentyfour", "physical_filter": "d-r", 

172 "visit_system": 1}) 

173 

174 # Create and store a dataset 

175 metric = makeExampleMetrics() 

176 dataId = {"instrument": "DummyCamComp", "visit": 423} 

177 

178 # Create a DatasetRef for put 

179 refIn = DatasetRef(datasetType, dataId, id=None) 

180 

181 # Put with a preexisting id should fail 

182 with self.assertRaises(ValueError): 

183 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

184 

185 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

186 # and once with a DatasetType 

187 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

188 with self.subTest(args=args): 

189 ref = butler.put(metric, *args) 

190 self.assertIsInstance(ref, DatasetRef) 

191 

192 # Test getDirect 

193 metricOut = butler.getDirect(ref) 

194 self.assertEqual(metric, metricOut) 

195 # Test get 

196 metricOut = butler.get(ref.datasetType.name, dataId) 

197 self.assertEqual(metric, metricOut) 

198 # Test get with a datasetRef 

199 metricOut = butler.get(ref) 

200 self.assertEqual(metric, metricOut) 

201 # Test getDeferred with dataId 

202 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get() 

203 self.assertEqual(metric, metricOut) 

204 # Test getDeferred with a datasetRef 

205 metricOut = butler.getDeferred(ref).get() 

206 self.assertEqual(metric, metricOut) 

207 

208 # Check we can get components 

209 if storageClass.isComposite(): 

210 self.assertGetComponents(butler, ref, 

211 ("summary", "data", "output"), metric) 

212 

213 # Remove from the tagged collection only; after that we 

214 # shouldn't be able to find it unless we use the dataset_id. 

215 butler.pruneDatasets([ref]) 

216 with self.assertRaises(LookupError): 

217 butler.datasetExists(*args) 

218 # Registry still knows about it, if we use the dataset_id. 

219 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

220 # If we use the output ref with the dataset_id, we should 

221 # still be able to load it with getDirect(). 

222 self.assertEqual(metric, butler.getDirect(ref)) 

223 

224 # Reinsert into collection, then delete from Datastore *and* 

225 # remove from collection. 

226 butler.registry.associate(tag, [ref]) 

227 butler.pruneDatasets([ref], unstore=True) 

228 # Lookup with original args should still fail. 

229 with self.assertRaises(LookupError): 

230 butler.datasetExists(*args) 

231 # Now getDirect() should fail, too. 

232 with self.assertRaises(FileNotFoundError, msg=f"Checking ref {ref} not found"): 

233 butler.getDirect(ref) 

234 # Registry still knows about it, if we use the dataset_id. 

235 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

236 

237 # Now remove the dataset completely. 

238 butler.pruneDatasets([ref], purge=True, unstore=True) 

239 # Lookup with original args should still fail. 

240 with self.assertRaises(LookupError): 

241 butler.datasetExists(*args) 

242 # getDirect() should still fail. 

243 with self.assertRaises(FileNotFoundError): 

244 butler.getDirect(ref) 

245 # Registry shouldn't be able to find it by dataset_id anymore. 

246 self.assertIsNone(butler.registry.getDataset(ref.id)) 

247 

248 # Put the dataset again, since the last thing we did was remove it. 

249 ref = butler.put(metric, refIn) 

250 

251 # Get with parameters 

252 stop = 4 

253 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

254 self.assertNotEqual(metric, sliced) 

255 self.assertEqual(metric.summary, sliced.summary) 

256 self.assertEqual(metric.output, sliced.output) 

257 self.assertEqual(metric.data[:stop], sliced.data) 

258 # getDeferred with parameters 

259 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

260 self.assertNotEqual(metric, sliced) 

261 self.assertEqual(metric.summary, sliced.summary) 

262 self.assertEqual(metric.output, sliced.output) 

263 self.assertEqual(metric.data[:stop], sliced.data) 

264 # getDeferred with deferred parameters 

265 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

266 self.assertNotEqual(metric, sliced) 

267 self.assertEqual(metric.summary, sliced.summary) 

268 self.assertEqual(metric.output, sliced.output) 

269 self.assertEqual(metric.data[:stop], sliced.data) 

270 

271 if storageClass.isComposite(): 

272 # Check that components can be retrieved 

273 metricOut = butler.get(ref.datasetType.name, dataId) 

274 compNameS = ref.datasetType.componentTypeName("summary") 

275 compNameD = ref.datasetType.componentTypeName("data") 

276 summary = butler.get(compNameS, dataId) 

277 self.assertEqual(summary, metric.summary) 

278 data = butler.get(compNameD, dataId) 

279 self.assertEqual(data, metric.data) 

280 

281 if "counter" in storageClass.readComponents: 

282 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

283 self.assertEqual(count, len(data)) 

284 

285 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId, 

286 parameters={"slice": slice(stop)}) 

287 self.assertEqual(count, stop) 

288 

289 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

290 summary = butler.getDirect(compRef) 

291 self.assertEqual(summary, metric.summary) 

292 

293 # Create a Dataset type that has the same name but is inconsistent. 

294 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions, 

295 self.storageClassFactory.getStorageClass("Config")) 

296 

297 # Getting with a dataset type that does not match registry fails 

298 with self.assertRaises(ValueError): 

299 butler.get(inconsistentDatasetType, dataId) 

300 

301 # Combining a DatasetRef with a dataId should fail 

302 with self.assertRaises(ValueError): 

303 butler.get(ref, dataId) 

304 # Getting with an explicit ref should fail if the id doesn't match 

305 with self.assertRaises(ValueError): 

306 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

307 

308 # Getting a dataset with unknown parameters should fail 

309 with self.assertRaises(KeyError): 

310 butler.get(ref, parameters={"unsupported": True}) 

311 

312 # Check we have a collection 

313 collections = set(butler.registry.queryCollections()) 

314 self.assertEqual(collections, {run, tag}) 

315 

316 # Clean up to check that we can remove something that may have 

317 # already had a component removed 

318 butler.pruneDatasets([ref], unstore=True, purge=True) 

319 

320 # Add a dataset back in since some downstream tests require 

321 # something to be present 

322 ref = butler.put(metric, refIn) 

323 

324 return butler 

325 

326 def testDeferredCollectionPassing(self): 

327 # Construct a butler with no run or collection, but make it writeable. 

328 butler = Butler(self.tmpConfigFile, writeable=True) 

329 # Create and register a DatasetType 

330 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

331 datasetType = self.addDatasetType("example", dimensions, 

332 self.storageClassFactory.getStorageClass("StructuredData"), 

333 butler.registry) 

334 # Add needed Dimensions 

335 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

336 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

337 "name": "d-r", 

338 "abstract_filter": "R"}) 

339 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

340 "name": "fourtwentythree", "physical_filter": "d-r"}) 

341 dataId = {"instrument": "DummyCamComp", "visit": 423} 

342 # Create dataset. 

343 metric = makeExampleMetrics() 

344 # Register a new run and put dataset. 

345 run = "deferred" 

346 butler.registry.registerRun(run) 

347 ref = butler.put(metric, datasetType, dataId, run=run) 

348 # Putting with no run should fail with TypeError. 

349 with self.assertRaises(TypeError): 

350 butler.put(metric, datasetType, dataId) 

351 # Dataset should exist. 

352 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

353 # We should be able to get the dataset back, but with and without 

354 # a deferred dataset handle. 

355 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

356 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

357 # Trying to find the dataset without any collection is a TypeError. 

358 with self.assertRaises(TypeError): 

359 butler.datasetExists(datasetType, dataId) 

360 with self.assertRaises(TypeError): 

361 butler.get(datasetType, dataId) 

362 # Associate the dataset with a different collection. 

363 butler.registry.registerCollection("tagged") 

364 butler.registry.associate("tagged", [ref]) 

365 # Deleting the dataset from the new collection should make it findable 

366 # in the original collection. 

367 butler.pruneDatasets([ref], tags=["tagged"]) 

368 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

369 

370 

371class ButlerTests(ButlerPutGetTests): 

372 """Tests for Butler. 

373 """ 

374 useTempRoot = True 

375 

376 def setUp(self): 

377 """Create a new butler root for each test.""" 

378 if self.useTempRoot: 

379 self.root = tempfile.mkdtemp(dir=TESTDIR) 

380 Butler.makeRepo(self.root, config=Config(self.configFile)) 

381 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

382 else: 

383 self.root = None 

384 self.tmpConfigFile = self.configFile 

385 

386 def testConstructor(self): 

387 """Independent test of constructor. 

388 """ 

389 butler = Butler(self.tmpConfigFile, run="ingest") 

390 self.assertIsInstance(butler, Butler) 

391 

392 collections = set(butler.registry.queryCollections()) 

393 self.assertEqual(collections, {"ingest"}) 

394 

395 butler2 = Butler(butler=butler, collections=["other"]) 

396 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"])) 

397 self.assertIsNone(butler2.run) 

398 self.assertIs(butler.registry, butler2.registry) 

399 self.assertIs(butler.datastore, butler2.datastore) 

400 

401 def testBasicPutGet(self): 

402 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

403 self.runPutGetTest(storageClass, "test_metric") 

404 

405 def testCompositePutGetConcrete(self): 

406 

407 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

408 butler = self.runPutGetTest(storageClass, "test_metric") 

409 

410 # Should *not* be disassembled 

411 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

412 self.assertEqual(len(datasets), 1) 

413 uri, components = butler.getURIs(datasets[0]) 

414 self.assertIsInstance(uri, ButlerURI) 

415 self.assertFalse(components) 

416 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

417 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

418 

419 # Predicted dataset 

420 dataId = {"instrument": "DummyCamComp", "visit": 424} 

421 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

422 self.assertFalse(components) 

423 self.assertIsInstance(uri, ButlerURI) 

424 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

425 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

426 

427 def testCompositePutGetVirtual(self): 

428 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

429 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

430 

431 # Should be disassembled 

432 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

433 self.assertEqual(len(datasets), 1) 

434 uri, components = butler.getURIs(datasets[0]) 

435 

436 if butler.datastore.isEphemeral: 

437 # Never disassemble in-memory datastore 

438 self.assertIsInstance(uri, ButlerURI) 

439 self.assertFalse(components) 

440 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

441 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

442 else: 

443 self.assertIsNone(uri) 

444 self.assertEqual(set(components), set(storageClass.components)) 

445 for compuri in components.values(): 

446 self.assertIsInstance(compuri, ButlerURI) 

447 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

448 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

449 

450 # Predicted dataset 

451 dataId = {"instrument": "DummyCamComp", "visit": 424} 

452 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

453 

454 if butler.datastore.isEphemeral: 

455 # Never disassembled 

456 self.assertIsInstance(uri, ButlerURI) 

457 self.assertFalse(components) 

458 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

459 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

460 else: 

461 self.assertIsNone(uri) 

462 self.assertEqual(set(components), set(storageClass.components)) 

463 for compuri in components.values(): 

464 self.assertIsInstance(compuri, ButlerURI) 

465 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

466 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

467 

468 def testIngest(self): 

469 butler = Butler(self.tmpConfigFile, run="ingest") 

470 

471 # Create and register a DatasetType 

472 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

473 

474 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

475 datasetTypeName = "metric" 

476 

477 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

478 

479 # Add needed Dimensions 

480 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

481 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

482 "name": "d-r", 

483 "abstract_filter": "R"}) 

484 for detector in (1, 2): 

485 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

486 "full_name": f"detector{detector}"}) 

487 

488 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

489 "name": "fourtwentythree", "physical_filter": "d-r"}, 

490 {"instrument": "DummyCamComp", "id": 424, 

491 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

492 

493 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

494 dataRoot = os.path.join(TESTDIR, "data", "basic") 

495 datasets = [] 

496 for detector in (1, 2): 

497 detector_name = f"detector_{detector}" 

498 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

499 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

500 # Create a DatasetRef for ingest 

501 refIn = DatasetRef(datasetType, dataId, id=None) 

502 

503 datasets.append(FileDataset(path=metricFile, 

504 refs=[refIn], 

505 formatter=formatter)) 

506 

507 butler.ingest(*datasets, transfer="copy") 

508 

509 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

510 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

511 

512 metrics1 = butler.get(datasetTypeName, dataId1) 

513 metrics2 = butler.get(datasetTypeName, dataId2) 

514 self.assertNotEqual(metrics1, metrics2) 

515 

516 # Compare URIs 

517 uri1 = butler.getURI(datasetTypeName, dataId1) 

518 uri2 = butler.getURI(datasetTypeName, dataId2) 

519 self.assertNotEqual(uri1, uri2) 

520 

521 # Now do a multi-dataset but single file ingest 

522 metricFile = os.path.join(dataRoot, "detectors.yaml") 

523 refs = [] 

524 for detector in (1, 2): 

525 detector_name = f"detector_{detector}" 

526 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

527 # Create a DatasetRef for ingest 

528 refs.append(DatasetRef(datasetType, dataId, id=None)) 

529 

530 datasets = [] 

531 datasets.append(FileDataset(path=metricFile, 

532 refs=refs, 

533 formatter=MultiDetectorFormatter)) 

534 

535 butler.ingest(*datasets, transfer="copy") 

536 

537 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

538 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

539 

540 multi1 = butler.get(datasetTypeName, dataId1) 

541 multi2 = butler.get(datasetTypeName, dataId2) 

542 

543 self.assertEqual(multi1, metrics1) 

544 self.assertEqual(multi2, metrics2) 

545 

546 # Compare URIs 

547 uri1 = butler.getURI(datasetTypeName, dataId1) 

548 uri2 = butler.getURI(datasetTypeName, dataId2) 

549 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

550 

551 # Test that removing one does not break the second 

552 # This line will issue a warning log message for a ChainedDatastore 

553 # that uses an InMemoryDatastore since in-memory can not ingest 

554 # files. 

555 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

556 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

557 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

558 multi2b = butler.get(datasetTypeName, dataId2) 

559 self.assertEqual(multi2, multi2b) 

560 

561 def testPruneCollections(self): 

562 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

563 butler = Butler(self.tmpConfigFile, writeable=True) 

564 # Load registry data with dimensions to hang datasets off of. 

565 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

566 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

567 # Add some RUN-type collections. 

568 run1 = "run1" 

569 butler.registry.registerRun(run1) 

570 run2 = "run2" 

571 butler.registry.registerRun(run2) 

572 # put some datasets. ref1 and ref2 have the same data ID, and are in 

573 # different runs. ref3 has a different data ID. 

574 metric = makeExampleMetrics() 

575 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

576 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

577 butler.registry) 

578 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

579 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

580 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

581 # Try to delete a RUN collection without purge, or with purge and not 

582 # unstore. 

583 with self.assertRaises(TypeError): 

584 butler.pruneCollection(run1) 

585 with self.assertRaises(TypeError): 

586 butler.pruneCollection(run2, purge=True) 

587 # Add a TAGGED collection and associate ref3 only into it. 

588 tag1 = "tag1" 

589 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

590 butler.registry.associate(tag1, [ref3]) 

591 # Add a CHAINED collection that searches run1 and then run2. It 

592 # logically contains only ref1, because ref2 is shadowed due to them 

593 # having the same data ID and dataset type. 

594 chain1 = "chain1" 

595 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

596 butler.registry.setCollectionChain(chain1, [run1, run2]) 

597 # Try to delete RUN collections, which should fail with complete 

598 # rollback because they're still referenced by the CHAINED 

599 # collection. 

600 with self.assertRaises(Exception): 

601 butler.pruneCollection(run1, pruge=True, unstore=True) 

602 with self.assertRaises(Exception): 

603 butler.pruneCollection(run2, pruge=True, unstore=True) 

604 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

605 [ref1, ref2, ref3]) 

606 self.assertTrue(butler.datastore.exists(ref1)) 

607 self.assertTrue(butler.datastore.exists(ref2)) 

608 self.assertTrue(butler.datastore.exists(ref3)) 

609 # Try to delete CHAINED and TAGGED collections with purge; should not 

610 # work. 

611 with self.assertRaises(TypeError): 

612 butler.pruneCollection(tag1, purge=True, unstore=True) 

613 with self.assertRaises(TypeError): 

614 butler.pruneCollection(chain1, purge=True, unstore=True) 

615 # Remove the tagged collection with unstore=False. This should not 

616 # affect the datasets. 

617 butler.pruneCollection(tag1) 

618 with self.assertRaises(MissingCollectionError): 

619 butler.registry.getCollectionType(tag1) 

620 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

621 [ref1, ref2, ref3]) 

622 self.assertTrue(butler.datastore.exists(ref1)) 

623 self.assertTrue(butler.datastore.exists(ref2)) 

624 self.assertTrue(butler.datastore.exists(ref3)) 

625 # Add the tagged collection back in, and remove it with unstore=True. 

626 # This should remove ref3 only from the datastore. 

627 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

628 butler.registry.associate(tag1, [ref3]) 

629 butler.pruneCollection(tag1, unstore=True) 

630 with self.assertRaises(MissingCollectionError): 

631 butler.registry.getCollectionType(tag1) 

632 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

633 [ref1, ref2, ref3]) 

634 self.assertTrue(butler.datastore.exists(ref1)) 

635 self.assertTrue(butler.datastore.exists(ref2)) 

636 self.assertFalse(butler.datastore.exists(ref3)) 

637 # Delete the chain with unstore=False. The datasets should not be 

638 # affected at all. 

639 butler.pruneCollection(chain1) 

640 with self.assertRaises(MissingCollectionError): 

641 butler.registry.getCollectionType(chain1) 

642 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

643 [ref1, ref2, ref3]) 

644 self.assertTrue(butler.datastore.exists(ref1)) 

645 self.assertTrue(butler.datastore.exists(ref2)) 

646 self.assertFalse(butler.datastore.exists(ref3)) 

647 # Redefine and then delete the chain with unstore=True. Only ref1 

648 # should be unstored (ref3 has already been unstored, but otherwise 

649 # would be now). 

650 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

651 butler.registry.setCollectionChain(chain1, [run1, run2]) 

652 butler.pruneCollection(chain1, unstore=True) 

653 with self.assertRaises(MissingCollectionError): 

654 butler.registry.getCollectionType(chain1) 

655 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

656 [ref1, ref2, ref3]) 

657 self.assertFalse(butler.datastore.exists(ref1)) 

658 self.assertTrue(butler.datastore.exists(ref2)) 

659 self.assertFalse(butler.datastore.exists(ref3)) 

660 # Remove run1. This removes ref1 and ref3 from the registry (they're 

661 # already gone from the datastore, which is fine). 

662 butler.pruneCollection(run1, purge=True, unstore=True) 

663 with self.assertRaises(MissingCollectionError): 

664 butler.registry.getCollectionType(run1) 

665 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

666 [ref2]) 

667 self.assertTrue(butler.datastore.exists(ref2)) 

668 # Remove run2. This removes ref2 from the registry and the datastore. 

669 butler.pruneCollection(run2, purge=True, unstore=True) 

670 with self.assertRaises(MissingCollectionError): 

671 butler.registry.getCollectionType(run2) 

672 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

673 []) 

674 

675 def testPickle(self): 

676 """Test pickle support. 

677 """ 

678 butler = Butler(self.tmpConfigFile, run="ingest") 

679 butlerOut = pickle.loads(pickle.dumps(butler)) 

680 self.assertIsInstance(butlerOut, Butler) 

681 self.assertEqual(butlerOut._config, butler._config) 

682 self.assertEqual(butlerOut.collections, butler.collections) 

683 self.assertEqual(butlerOut.run, butler.run) 

684 

685 def testGetDatasetTypes(self): 

686 butler = Butler(self.tmpConfigFile, run="ingest") 

687 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

688 dimensionEntries = [ 

689 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

690 {"instrument": "DummyCamComp"}), 

691 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}), 

692 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

693 ] 

694 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

695 # Add needed Dimensions 

696 for args in dimensionEntries: 

697 butler.registry.insertDimensionData(*args) 

698 

699 # When a DatasetType is added to the registry entries are not created 

700 # for components but querying them can return the components. 

701 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

702 components = set() 

703 for datasetTypeName in datasetTypeNames: 

704 # Create and register a DatasetType 

705 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

706 

707 for componentName in storageClass.components: 

708 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

709 

710 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

711 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

712 

713 # Now that we have some dataset types registered, validate them 

714 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

715 "datasetType.component"]) 

716 

717 # Add a new datasetType that will fail template validation 

718 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

719 if self.validationCanFail: 

720 with self.assertRaises(ValidationError): 

721 butler.validateConfiguration() 

722 

723 # Rerun validation but with a subset of dataset type names 

724 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

725 

726 # Rerun validation but ignore the bad datasetType 

727 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

728 "datasetType.component"]) 

729 

730 def testTransaction(self): 

731 butler = Butler(self.tmpConfigFile, run="ingest") 

732 datasetTypeName = "test_metric" 

733 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

734 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

735 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

736 "abstract_filter": "R"}), 

737 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

738 "physical_filter": "d-r"})) 

739 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

740 metric = makeExampleMetrics() 

741 dataId = {"instrument": "DummyCam", "visit": 42} 

742 # Create and register a DatasetType 

743 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

744 with self.assertRaises(TransactionTestError): 

745 with butler.transaction(): 

746 # Add needed Dimensions 

747 for args in dimensionEntries: 

748 butler.registry.insertDimensionData(*args) 

749 # Store a dataset 

750 ref = butler.put(metric, datasetTypeName, dataId) 

751 self.assertIsInstance(ref, DatasetRef) 

752 # Test getDirect 

753 metricOut = butler.getDirect(ref) 

754 self.assertEqual(metric, metricOut) 

755 # Test get 

756 metricOut = butler.get(datasetTypeName, dataId) 

757 self.assertEqual(metric, metricOut) 

758 # Check we can get components 

759 self.assertGetComponents(butler, ref, 

760 ("summary", "data", "output"), metric) 

761 raise TransactionTestError("This should roll back the entire transaction") 

762 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"): 

763 butler.registry.expandDataId(dataId) 

764 # Should raise LookupError for missing data ID value 

765 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

766 butler.get(datasetTypeName, dataId) 

767 # Also check explicitly if Dataset entry is missing 

768 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

769 # Direct retrieval should not find the file in the Datastore 

770 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

771 butler.getDirect(ref) 

772 

773 def testMakeRepo(self): 

774 """Test that we can write butler configuration to a new repository via 

775 the Butler.makeRepo interface and then instantiate a butler from the 

776 repo root. 

777 """ 

778 # Do not run the test if we know this datastore configuration does 

779 # not support a file system root 

780 if self.fullConfigKey is None: 

781 return 

782 

783 # Remove the file created in setUp 

784 os.unlink(self.tmpConfigFile) 

785 

786 createRegistry = not self.useTempRoot 

787 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile), 

788 createRegistry=createRegistry) 

789 limited = Config(self.configFile) 

790 butler1 = Butler(butlerConfig) 

791 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

792 config=Config(self.configFile), overwrite=True) 

793 full = Config(self.tmpConfigFile) 

794 butler2 = Butler(butlerConfig) 

795 # Butlers should have the same configuration regardless of whether 

796 # defaults were expanded. 

797 self.assertEqual(butler1._config, butler2._config) 

798 # Config files loaded directly should not be the same. 

799 self.assertNotEqual(limited, full) 

800 # Make sure "limited" doesn't have a few keys we know it should be 

801 # inheriting from defaults. 

802 self.assertIn(self.fullConfigKey, full) 

803 self.assertNotIn(self.fullConfigKey, limited) 

804 

805 # Collections don't appear until something is put in them 

806 collections1 = set(butler1.registry.queryCollections()) 

807 self.assertEqual(collections1, set()) 

808 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

809 

810 # Check that a config with no associated file name will not 

811 # work properly with relocatable Butler repo 

812 butlerConfig.configFile = None 

813 with self.assertRaises(ValueError): 

814 Butler(butlerConfig) 

815 

816 with self.assertRaises(FileExistsError): 

817 Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

818 config=Config(self.configFile), overwrite=False) 

819 

820 def testStringification(self): 

821 butler = Butler(self.tmpConfigFile, run="ingest") 

822 butlerStr = str(butler) 

823 

824 if self.datastoreStr is not None: 

825 for testStr in self.datastoreStr: 

826 self.assertIn(testStr, butlerStr) 

827 if self.registryStr is not None: 

828 self.assertIn(self.registryStr, butlerStr) 

829 

830 datastoreName = butler.datastore.name 

831 if self.datastoreName is not None: 

832 for testStr in self.datastoreName: 

833 self.assertIn(testStr, datastoreName) 

834 

835 

836class FileLikeDatastoreButlerTests(ButlerTests): 

837 """Common tests and specialization of ButlerTests for butlers backed 

838 by datastores that inherit from FileLikeDatastore. 

839 """ 

840 

841 def checkFileExists(self, root, path): 

842 """Checks if file exists at a given path (relative to root). 

843 

844 Test testPutTemplates verifies actual physical existance of the files 

845 in the requested location. For POSIXDatastore this test is equivalent 

846 to `os.path.exists` call. 

847 """ 

848 return os.path.exists(os.path.join(root, path)) 

849 

850 def testPutTemplates(self): 

851 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

852 butler = Butler(self.tmpConfigFile, run="ingest") 

853 

854 # Add needed Dimensions 

855 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

856 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

857 "name": "d-r", 

858 "abstract_filter": "R"}) 

859 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

860 "physical_filter": "d-r"}) 

861 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

862 "physical_filter": "d-r"}) 

863 

864 # Create and store a dataset 

865 metric = makeExampleMetrics() 

866 

867 # Create two almost-identical DatasetTypes (both will use default 

868 # template) 

869 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

870 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

871 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

872 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

873 

874 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

875 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

876 dataId3 = {"instrument": "DummyCamComp", "visit": 425} 

877 

878 # Put with exactly the data ID keys needed 

879 ref = butler.put(metric, "metric1", dataId1) 

880 self.assertTrue(self.checkFileExists(butler.datastore.root, 

881 "ingest/metric1/d-r/DummyCamComp_423.pickle")) 

882 

883 # Check the template based on dimensions 

884 butler.datastore.templates.validateTemplates([ref]) 

885 

886 # Put with extra data ID keys (physical_filter is an optional 

887 # dependency); should not change template (at least the way we're 

888 # defining them to behave now; the important thing is that they 

889 # must be consistent). 

890 ref = butler.put(metric, "metric2", dataId2) 

891 self.assertTrue(self.checkFileExists(butler.datastore.root, 

892 "ingest/metric2/d-r/DummyCamComp_v423.pickle")) 

893 

894 # Check the template based on dimensions 

895 butler.datastore.templates.validateTemplates([ref]) 

896 

897 # Now use a file template that will not result in unique filenames 

898 ref = butler.put(metric, "metric3", dataId1) 

899 

900 # Check the template based on dimensions. This one is a bad template 

901 with self.assertRaises(FileTemplateValidationError): 

902 butler.datastore.templates.validateTemplates([ref]) 

903 

904 with self.assertRaises(FileExistsError): 

905 butler.put(metric, "metric3", dataId3) 

906 

907 def testImportExport(self): 

908 # Run put/get tests just to create and populate a repo. 

909 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

910 self.runImportExportTest(storageClass) 

911 

912 @unittest.expectedFailure 

913 def testImportExportVirtualComposite(self): 

914 # Run put/get tests just to create and populate a repo. 

915 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

916 self.runImportExportTest(storageClass) 

917 

918 def runImportExportTest(self, storageClass): 

919 exportButler = self.runPutGetTest(storageClass, "test_metric") 

920 # Test that the repo actually has at least one dataset. 

921 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

922 self.assertGreater(len(datasets), 0) 

923 # Export those datasets. We used TemporaryDirectory because there 

924 # doesn't seem to be a way to get the filename (as opposed to the file 

925 # object) from any of tempfile's temporary-file context managers. 

926 with tempfile.TemporaryDirectory() as exportDir: 

927 # TODO: When PosixDatastore supports transfer-on-exist, add tests 

928 # for that. 

929 exportFile = os.path.join(exportDir, "exports.yaml") 

930 with exportButler.export(filename=exportFile) as export: 

931 export.saveDatasets(datasets) 

932 self.assertTrue(os.path.exists(exportFile)) 

933 with tempfile.TemporaryDirectory() as importDir: 

934 Butler.makeRepo(importDir, config=Config(self.configFile)) 

935 # Calling script.butlerImport tests the implementation of the 

936 # butler command line interface "import" subcommand. Functions 

937 # in the script folder are generally considered protected and 

938 # should not be used as public api. 

939 with open(exportFile, "r") as f: 

940 script.butlerImport(importDir, output_run="ingest/run", export_file=f, 

941 directory=exportButler.datastore.root, transfer="symlink") 

942 importButler = Butler(importDir, run="ingest/run") 

943 for ref in datasets: 

944 with self.subTest(ref=ref): 

945 # Test for existence by passing in the DatasetType and 

946 # data ID separately, to avoid lookup by dataset_id. 

947 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

948 

949 

950class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

951 """PosixDatastore specialization of a butler""" 

952 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

953 fullConfigKey = ".datastore.formatters" 

954 validationCanFail = True 

955 datastoreStr = ["/tmp"] 

956 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"] 

957 registryStr = "/gen3.sqlite3" 

958 

959 

960class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

961 """InMemoryDatastore specialization of a butler""" 

962 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

963 fullConfigKey = None 

964 useTempRoot = False 

965 validationCanFail = False 

966 datastoreStr = ["datastore='InMemory"] 

967 datastoreName = ["InMemoryDatastore@"] 

968 registryStr = ":memory:" 

969 

970 def testIngest(self): 

971 pass 

972 

973 

974class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

975 """PosixDatastore specialization""" 

976 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

977 fullConfigKey = ".datastore.datastores.1.formatters" 

978 validationCanFail = True 

979 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"] 

980 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1", 

981 "SecondDatastore"] 

982 registryStr = "/gen3.sqlite3" 

983 

984 

985class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

986 """Test that a yaml file in one location can refer to a root in another.""" 

987 

988 datastoreStr = ["dir1"] 

989 # Disable the makeRepo test since we are deliberately not using 

990 # butler.yaml as the config name. 

991 fullConfigKey = None 

992 

993 def setUp(self): 

994 self.root = tempfile.mkdtemp(dir=TESTDIR) 

995 

996 # Make a new repository in one place 

997 self.dir1 = os.path.join(self.root, "dir1") 

998 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

999 

1000 # Move the yaml file to a different place and add a "root" 

1001 self.dir2 = os.path.join(self.root, "dir2") 

1002 safeMakeDir(self.dir2) 

1003 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1004 config = Config(configFile1) 

1005 config["root"] = self.dir1 

1006 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1007 config.dumpToFile(configFile2) 

1008 os.remove(configFile1) 

1009 self.tmpConfigFile = configFile2 

1010 

1011 def testFileLocations(self): 

1012 self.assertNotEqual(self.dir1, self.dir2) 

1013 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1014 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1015 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1016 

1017 

1018class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1019 """Test that a config file created by makeRepo outside of repo works.""" 

1020 

1021 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1022 

1023 def setUp(self): 

1024 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1025 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1026 

1027 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1028 Butler.makeRepo(self.root, config=Config(self.configFile), 

1029 outfile=self.tmpConfigFile) 

1030 

1031 def tearDown(self): 

1032 if os.path.exists(self.root2): 

1033 shutil.rmtree(self.root2, ignore_errors=True) 

1034 super().tearDown() 

1035 

1036 def testConfigExistence(self): 

1037 c = Config(self.tmpConfigFile) 

1038 uri_config = ButlerURI(c["root"]) 

1039 uri_expected = ButlerURI(self.root, forceDirectory=True) 

1040 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1041 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1042 

1043 def testPutGet(self): 

1044 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1045 self.runPutGetTest(storageClass, "test_metric") 

1046 

1047 

1048class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1049 """Test that a config file created by makeRepo outside of repo works.""" 

1050 

1051 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1052 

1053 def setUp(self): 

1054 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1055 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1056 

1057 self.tmpConfigFile = self.root2 

1058 Butler.makeRepo(self.root, config=Config(self.configFile), 

1059 outfile=self.tmpConfigFile) 

1060 

1061 def testConfigExistence(self): 

1062 # Append the yaml file else Config constructor does not know the file 

1063 # type. 

1064 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1065 super().testConfigExistence() 

1066 

1067 

1068class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1069 """Test that a config file created by makeRepo outside of repo works.""" 

1070 

1071 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1072 

1073 def setUp(self): 

1074 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1075 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1076 

1077 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

1078 Butler.makeRepo(self.root, config=Config(self.configFile), 

1079 outfile=self.tmpConfigFile) 

1080 

1081 

1082@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1083@mock_s3 

1084class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

1085 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1086 a local in-memory SqlRegistry. 

1087 """ 

1088 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1089 fullConfigKey = None 

1090 validationCanFail = True 

1091 

1092 bucketName = "anybucketname" 

1093 """Name of the Bucket that will be used in the tests. The name is read from 

1094 the config file used with the tests during set-up. 

1095 """ 

1096 

1097 root = "butlerRoot/" 

1098 """Root repository directory expected to be used in case useTempRoot=False. 

1099 Otherwise the root is set to a 20 characters long randomly generated string 

1100 during set-up. 

1101 """ 

1102 

1103 datastoreStr = [f"datastore={root}"] 

1104 """Contains all expected root locations in a format expected to be 

1105 returned by Butler stringification. 

1106 """ 

1107 

1108 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"] 

1109 """The expected format of the S3Datastore string.""" 

1110 

1111 registryStr = ":memory:" 

1112 """Expected format of the Registry string.""" 

1113 

1114 def genRoot(self): 

1115 """Returns a random string of len 20 to serve as a root 

1116 name for the temporary bucket repo. 

1117 

1118 This is equivalent to tempfile.mkdtemp as this is what self.root 

1119 becomes when useTempRoot is True. 

1120 """ 

1121 rndstr = "".join( 

1122 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1123 ) 

1124 return rndstr + "/" 

1125 

1126 def setUp(self): 

1127 config = Config(self.configFile) 

1128 uri = ButlerURI(config[".datastore.datastore.root"]) 

1129 self.bucketName = uri.netloc 

1130 

1131 # set up some fake credentials if they do not exist 

1132 self.usingDummyCredentials = setAwsEnvCredentials() 

1133 

1134 if self.useTempRoot: 

1135 self.root = self.genRoot() 

1136 rooturi = f"s3://{self.bucketName}/{self.root}" 

1137 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1138 

1139 # MOTO needs to know that we expect Bucket bucketname to exist 

1140 # (this used to be the class attribute bucketName) 

1141 s3 = boto3.resource("s3") 

1142 s3.create_bucket(Bucket=self.bucketName) 

1143 

1144 self.datastoreStr = f"datastore={self.root}" 

1145 self.datastoreName = [f"S3Datastore@{rooturi}"] 

1146 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1147 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1148 

1149 def tearDown(self): 

1150 s3 = boto3.resource("s3") 

1151 bucket = s3.Bucket(self.bucketName) 

1152 try: 

1153 bucket.objects.all().delete() 

1154 except botocore.exceptions.ClientError as e: 

1155 if e.response["Error"]["Code"] == "404": 

1156 # the key was not reachable - pass 

1157 pass 

1158 else: 

1159 raise 

1160 

1161 bucket = s3.Bucket(self.bucketName) 

1162 bucket.delete() 

1163 

1164 # unset any potentially set dummy credentials 

1165 if self.usingDummyCredentials: 

1166 unsetAwsEnvCredentials() 

1167 

1168 def checkFileExists(self, root, relpath): 

1169 """Checks if file exists at a given path (relative to root). 

1170 

1171 Test testPutTemplates verifies actual physical existance of the files 

1172 in the requested location. For S3Datastore this test is equivalent to 

1173 `lsst.daf.butler.core.s3utils.s3checkFileExists` call. 

1174 """ 

1175 uri = ButlerURI(root) 

1176 uri.updateFile(relpath) 

1177 return s3CheckFileExists(uri)[0] 

1178 

1179 @unittest.expectedFailure 

1180 def testImportExport(self): 

1181 super().testImportExport() 

1182 

1183 

1184if __name__ == "__main__": 1184 ↛ 1185line 1184 didn't jump to line 1185, because the condition on line 1184 was never true

1185 unittest.main()