Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import os 

26import posixpath 

27import unittest 

28import tempfile 

29import shutil 

30import pickle 

31import string 

32import random 

33import time 

34import socket 

35 

36try: 

37 import boto3 

38 import botocore 

39 from moto import mock_s3 

40except ImportError: 

41 boto3 = None 

42 

43 def mock_s3(cls): 

44 """A no-op decorator in case moto mock_s3 can not be imported. 

45 """ 

46 return cls 

47 

48try: 

49 from cheroot import wsgi 

50 from wsgidav.wsgidav_app import WsgiDAVApp 

51except ImportError: 

52 WsgiDAVApp = None 

53 

54import astropy.time 

55from threading import Thread 

56from tempfile import gettempdir 

57from lsst.utils import doImport 

58from lsst.daf.butler.core.utils import safeMakeDir 

59from lsst.daf.butler import Butler, Config, ButlerConfig 

60from lsst.daf.butler import StorageClassFactory 

61from lsst.daf.butler import DatasetType, DatasetRef 

62from lsst.daf.butler import FileTemplateValidationError, ValidationError 

63from lsst.daf.butler import FileDataset 

64from lsst.daf.butler import CollectionSearch, CollectionType 

65from lsst.daf.butler import ButlerURI 

66from lsst.daf.butler import script 

67from lsst.daf.butler.registry import MissingCollectionError 

68from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

69from lsst.daf.butler.core.s3utils import (setAwsEnvCredentials, 

70 unsetAwsEnvCredentials) 

71from lsst.daf.butler.core.webdavutils import isWebdavEndpoint 

72 

73from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

74 

75TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

76 

77 

78def makeExampleMetrics(): 

79 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

80 {"a": [1, 2, 3], 

81 "b": {"blue": 5, "red": "green"}}, 

82 [563, 234, 456.7, 752, 8, 9, 27] 

83 ) 

84 

85 

86class TransactionTestError(Exception): 

87 """Specific error for testing transactions, to prevent misdiagnosing 

88 that might otherwise occur when a standard exception is used. 

89 """ 

90 pass 

91 

92 

93class ButlerConfigTests(unittest.TestCase): 

94 """Simple tests for ButlerConfig that are not tested in other test cases. 

95 """ 

96 

97 def testSearchPath(self): 

98 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

99 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

100 config1 = ButlerConfig(configFile) 

101 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

102 

103 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

104 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

105 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

106 self.assertIn("testConfigs", "\n".join(cm.output)) 

107 

108 key = ("datastore", "records", "table") 

109 self.assertNotEqual(config1[key], config2[key]) 

110 self.assertEqual(config2[key], "override_record") 

111 

112 

113class ButlerPutGetTests: 

114 """Helper method for running a suite of put/get tests from different 

115 butler configurations.""" 

116 

117 root = None 

118 

119 @staticmethod 

120 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

121 """Create a DatasetType and register it 

122 """ 

123 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

124 registry.registerDatasetType(datasetType) 

125 return datasetType 

126 

127 @classmethod 

128 def setUpClass(cls): 

129 cls.storageClassFactory = StorageClassFactory() 

130 cls.storageClassFactory.addFromConfig(cls.configFile) 

131 

132 def assertGetComponents(self, butler, datasetRef, components, reference): 

133 datasetType = datasetRef.datasetType 

134 dataId = datasetRef.dataId 

135 for component in components: 

136 compTypeName = datasetType.componentTypeName(component) 

137 result = butler.get(compTypeName, dataId) 

138 self.assertEqual(result, getattr(reference, component)) 

139 

140 def tearDown(self): 

141 if self.root is not None and os.path.exists(self.root): 

142 shutil.rmtree(self.root, ignore_errors=True) 

143 

144 def runPutGetTest(self, storageClass, datasetTypeName): 

145 # New datasets will be added to run and tag, but we will only look in 

146 # tag when looking up datasets. 

147 run = "ingest/run" 

148 tag = "ingest" 

149 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag]) 

150 

151 # There will not be a collection yet 

152 collections = set(butler.registry.queryCollections()) 

153 self.assertEqual(collections, set([run, tag])) 

154 

155 # Create and register a DatasetType 

156 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

157 

158 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

159 

160 # Try to create one that will have a name that is too long 

161 with self.assertRaises(Exception) as cm: 

162 self.addDatasetType("DatasetTypeNameTooLong" * 50, dimensions, storageClass, butler.registry) 

163 self.assertIn("check constraint", str(cm.exception).lower()) 

164 

165 # Add needed Dimensions 

166 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

167 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

168 "name": "d-r", 

169 "abstract_filter": "R"}) 

170 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp", 

171 "id": 1, 

172 "name": "default"}) 

173 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

174 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

175 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

176 "name": "fourtwentythree", "physical_filter": "d-r", 

177 "visit_system": 1, "datetime_begin": visit_start, 

178 "datetime_end": visit_end}) 

179 

180 # Add a second visit for some later tests 

181 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424, 

182 "name": "fourtwentyfour", "physical_filter": "d-r", 

183 "visit_system": 1}) 

184 

185 # Create and store a dataset 

186 metric = makeExampleMetrics() 

187 dataId = {"instrument": "DummyCamComp", "visit": 423} 

188 

189 # Create a DatasetRef for put 

190 refIn = DatasetRef(datasetType, dataId, id=None) 

191 

192 # Put with a preexisting id should fail 

193 with self.assertRaises(ValueError): 

194 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

195 

196 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

197 # and once with a DatasetType 

198 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

199 with self.subTest(args=args): 

200 ref = butler.put(metric, *args) 

201 self.assertIsInstance(ref, DatasetRef) 

202 

203 # Test getDirect 

204 metricOut = butler.getDirect(ref) 

205 self.assertEqual(metric, metricOut) 

206 # Test get 

207 metricOut = butler.get(ref.datasetType.name, dataId) 

208 self.assertEqual(metric, metricOut) 

209 # Test get with a datasetRef 

210 metricOut = butler.get(ref) 

211 self.assertEqual(metric, metricOut) 

212 # Test getDeferred with dataId 

213 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get() 

214 self.assertEqual(metric, metricOut) 

215 # Test getDeferred with a datasetRef 

216 metricOut = butler.getDeferred(ref).get() 

217 self.assertEqual(metric, metricOut) 

218 

219 # Check we can get components 

220 if storageClass.isComposite(): 

221 self.assertGetComponents(butler, ref, 

222 ("summary", "data", "output"), metric) 

223 

224 # Remove from the tagged collection only; after that we 

225 # shouldn't be able to find it unless we use the dataset_id. 

226 butler.pruneDatasets([ref]) 

227 with self.assertRaises(LookupError): 

228 butler.datasetExists(*args) 

229 # Registry still knows about it, if we use the dataset_id. 

230 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

231 # If we use the output ref with the dataset_id, we should 

232 # still be able to load it with getDirect(). 

233 self.assertEqual(metric, butler.getDirect(ref)) 

234 

235 # Reinsert into collection, then delete from Datastore *and* 

236 # remove from collection. 

237 butler.registry.associate(tag, [ref]) 

238 butler.pruneDatasets([ref], unstore=True) 

239 # Lookup with original args should still fail. 

240 with self.assertRaises(LookupError): 

241 butler.datasetExists(*args) 

242 # Now getDirect() should fail, too. 

243 with self.assertRaises(FileNotFoundError, msg=f"Checking ref {ref} not found"): 

244 butler.getDirect(ref) 

245 # Registry still knows about it, if we use the dataset_id. 

246 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

247 

248 # Now remove the dataset completely. 

249 butler.pruneDatasets([ref], purge=True, unstore=True) 

250 # Lookup with original args should still fail. 

251 with self.assertRaises(LookupError): 

252 butler.datasetExists(*args) 

253 # getDirect() should still fail. 

254 with self.assertRaises(FileNotFoundError): 

255 butler.getDirect(ref) 

256 # Registry shouldn't be able to find it by dataset_id anymore. 

257 self.assertIsNone(butler.registry.getDataset(ref.id)) 

258 

259 # Put the dataset again, since the last thing we did was remove it. 

260 ref = butler.put(metric, refIn) 

261 

262 # Get with parameters 

263 stop = 4 

264 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

265 self.assertNotEqual(metric, sliced) 

266 self.assertEqual(metric.summary, sliced.summary) 

267 self.assertEqual(metric.output, sliced.output) 

268 self.assertEqual(metric.data[:stop], sliced.data) 

269 # getDeferred with parameters 

270 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

271 self.assertNotEqual(metric, sliced) 

272 self.assertEqual(metric.summary, sliced.summary) 

273 self.assertEqual(metric.output, sliced.output) 

274 self.assertEqual(metric.data[:stop], sliced.data) 

275 # getDeferred with deferred parameters 

276 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

277 self.assertNotEqual(metric, sliced) 

278 self.assertEqual(metric.summary, sliced.summary) 

279 self.assertEqual(metric.output, sliced.output) 

280 self.assertEqual(metric.data[:stop], sliced.data) 

281 

282 if storageClass.isComposite(): 

283 # Check that components can be retrieved 

284 metricOut = butler.get(ref.datasetType.name, dataId) 

285 compNameS = ref.datasetType.componentTypeName("summary") 

286 compNameD = ref.datasetType.componentTypeName("data") 

287 summary = butler.get(compNameS, dataId) 

288 self.assertEqual(summary, metric.summary) 

289 data = butler.get(compNameD, dataId) 

290 self.assertEqual(data, metric.data) 

291 

292 if "counter" in storageClass.readComponents: 

293 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

294 self.assertEqual(count, len(data)) 

295 

296 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId, 

297 parameters={"slice": slice(stop)}) 

298 self.assertEqual(count, stop) 

299 

300 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

301 summary = butler.getDirect(compRef) 

302 self.assertEqual(summary, metric.summary) 

303 

304 # Create a Dataset type that has the same name but is inconsistent. 

305 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions, 

306 self.storageClassFactory.getStorageClass("Config")) 

307 

308 # Getting with a dataset type that does not match registry fails 

309 with self.assertRaises(ValueError): 

310 butler.get(inconsistentDatasetType, dataId) 

311 

312 # Combining a DatasetRef with a dataId should fail 

313 with self.assertRaises(ValueError): 

314 butler.get(ref, dataId) 

315 # Getting with an explicit ref should fail if the id doesn't match 

316 with self.assertRaises(ValueError): 

317 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

318 

319 # Getting a dataset with unknown parameters should fail 

320 with self.assertRaises(KeyError): 

321 butler.get(ref, parameters={"unsupported": True}) 

322 

323 # Check we have a collection 

324 collections = set(butler.registry.queryCollections()) 

325 self.assertEqual(collections, {run, tag}) 

326 

327 # Clean up to check that we can remove something that may have 

328 # already had a component removed 

329 butler.pruneDatasets([ref], unstore=True, purge=True) 

330 

331 # Add a dataset back in since some downstream tests require 

332 # something to be present 

333 ref = butler.put(metric, refIn) 

334 

335 return butler 

336 

337 def testDeferredCollectionPassing(self): 

338 # Construct a butler with no run or collection, but make it writeable. 

339 butler = Butler(self.tmpConfigFile, writeable=True) 

340 # Create and register a DatasetType 

341 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

342 datasetType = self.addDatasetType("example", dimensions, 

343 self.storageClassFactory.getStorageClass("StructuredData"), 

344 butler.registry) 

345 # Add needed Dimensions 

346 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

347 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

348 "name": "d-r", 

349 "abstract_filter": "R"}) 

350 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

351 "name": "fourtwentythree", "physical_filter": "d-r"}) 

352 dataId = {"instrument": "DummyCamComp", "visit": 423} 

353 # Create dataset. 

354 metric = makeExampleMetrics() 

355 # Register a new run and put dataset. 

356 run = "deferred" 

357 butler.registry.registerRun(run) 

358 ref = butler.put(metric, datasetType, dataId, run=run) 

359 # Putting with no run should fail with TypeError. 

360 with self.assertRaises(TypeError): 

361 butler.put(metric, datasetType, dataId) 

362 # Dataset should exist. 

363 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

364 # We should be able to get the dataset back, but with and without 

365 # a deferred dataset handle. 

366 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

367 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

368 # Trying to find the dataset without any collection is a TypeError. 

369 with self.assertRaises(TypeError): 

370 butler.datasetExists(datasetType, dataId) 

371 with self.assertRaises(TypeError): 

372 butler.get(datasetType, dataId) 

373 # Associate the dataset with a different collection. 

374 butler.registry.registerCollection("tagged") 

375 butler.registry.associate("tagged", [ref]) 

376 # Deleting the dataset from the new collection should make it findable 

377 # in the original collection. 

378 butler.pruneDatasets([ref], tags=["tagged"]) 

379 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

380 

381 

382class ButlerTests(ButlerPutGetTests): 

383 """Tests for Butler. 

384 """ 

385 useTempRoot = True 

386 

387 def setUp(self): 

388 """Create a new butler root for each test.""" 

389 if self.useTempRoot: 

390 self.root = tempfile.mkdtemp(dir=TESTDIR) 

391 Butler.makeRepo(self.root, config=Config(self.configFile)) 

392 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

393 else: 

394 self.root = None 

395 self.tmpConfigFile = self.configFile 

396 

397 def testConstructor(self): 

398 """Independent test of constructor. 

399 """ 

400 butler = Butler(self.tmpConfigFile, run="ingest") 

401 self.assertIsInstance(butler, Butler) 

402 

403 collections = set(butler.registry.queryCollections()) 

404 self.assertEqual(collections, {"ingest"}) 

405 

406 butler2 = Butler(butler=butler, collections=["other"]) 

407 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"])) 

408 self.assertIsNone(butler2.run) 

409 self.assertIs(butler.registry, butler2.registry) 

410 self.assertIs(butler.datastore, butler2.datastore) 

411 

412 def testBasicPutGet(self): 

413 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

414 self.runPutGetTest(storageClass, "test_metric") 

415 

416 def testCompositePutGetConcrete(self): 

417 

418 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

419 butler = self.runPutGetTest(storageClass, "test_metric") 

420 

421 # Should *not* be disassembled 

422 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

423 self.assertEqual(len(datasets), 1) 

424 uri, components = butler.getURIs(datasets[0]) 

425 self.assertIsInstance(uri, ButlerURI) 

426 self.assertFalse(components) 

427 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

428 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

429 

430 # Predicted dataset 

431 dataId = {"instrument": "DummyCamComp", "visit": 424} 

432 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

433 self.assertFalse(components) 

434 self.assertIsInstance(uri, ButlerURI) 

435 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

436 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

437 

438 def testCompositePutGetVirtual(self): 

439 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

440 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

441 

442 # Should be disassembled 

443 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

444 self.assertEqual(len(datasets), 1) 

445 uri, components = butler.getURIs(datasets[0]) 

446 

447 if butler.datastore.isEphemeral: 

448 # Never disassemble in-memory datastore 

449 self.assertIsInstance(uri, ButlerURI) 

450 self.assertFalse(components) 

451 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

452 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

453 else: 

454 self.assertIsNone(uri) 

455 self.assertEqual(set(components), set(storageClass.components)) 

456 for compuri in components.values(): 

457 self.assertIsInstance(compuri, ButlerURI) 

458 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

459 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

460 

461 # Predicted dataset 

462 dataId = {"instrument": "DummyCamComp", "visit": 424} 

463 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

464 

465 if butler.datastore.isEphemeral: 

466 # Never disassembled 

467 self.assertIsInstance(uri, ButlerURI) 

468 self.assertFalse(components) 

469 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

470 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

471 else: 

472 self.assertIsNone(uri) 

473 self.assertEqual(set(components), set(storageClass.components)) 

474 for compuri in components.values(): 

475 self.assertIsInstance(compuri, ButlerURI) 

476 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

477 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

478 

479 def testIngest(self): 

480 butler = Butler(self.tmpConfigFile, run="ingest") 

481 

482 # Create and register a DatasetType 

483 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

484 

485 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

486 datasetTypeName = "metric" 

487 

488 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

489 

490 # Add needed Dimensions 

491 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

492 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

493 "name": "d-r", 

494 "abstract_filter": "R"}) 

495 for detector in (1, 2): 

496 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

497 "full_name": f"detector{detector}"}) 

498 

499 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

500 "name": "fourtwentythree", "physical_filter": "d-r"}, 

501 {"instrument": "DummyCamComp", "id": 424, 

502 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

503 

504 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

505 dataRoot = os.path.join(TESTDIR, "data", "basic") 

506 datasets = [] 

507 for detector in (1, 2): 

508 detector_name = f"detector_{detector}" 

509 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

510 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

511 # Create a DatasetRef for ingest 

512 refIn = DatasetRef(datasetType, dataId, id=None) 

513 

514 datasets.append(FileDataset(path=metricFile, 

515 refs=[refIn], 

516 formatter=formatter)) 

517 

518 butler.ingest(*datasets, transfer="copy") 

519 

520 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

521 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

522 

523 metrics1 = butler.get(datasetTypeName, dataId1) 

524 metrics2 = butler.get(datasetTypeName, dataId2) 

525 self.assertNotEqual(metrics1, metrics2) 

526 

527 # Compare URIs 

528 uri1 = butler.getURI(datasetTypeName, dataId1) 

529 uri2 = butler.getURI(datasetTypeName, dataId2) 

530 self.assertNotEqual(uri1, uri2) 

531 

532 # Now do a multi-dataset but single file ingest 

533 metricFile = os.path.join(dataRoot, "detectors.yaml") 

534 refs = [] 

535 for detector in (1, 2): 

536 detector_name = f"detector_{detector}" 

537 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

538 # Create a DatasetRef for ingest 

539 refs.append(DatasetRef(datasetType, dataId, id=None)) 

540 

541 datasets = [] 

542 datasets.append(FileDataset(path=metricFile, 

543 refs=refs, 

544 formatter=MultiDetectorFormatter)) 

545 

546 butler.ingest(*datasets, transfer="copy") 

547 

548 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

549 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

550 

551 multi1 = butler.get(datasetTypeName, dataId1) 

552 multi2 = butler.get(datasetTypeName, dataId2) 

553 

554 self.assertEqual(multi1, metrics1) 

555 self.assertEqual(multi2, metrics2) 

556 

557 # Compare URIs 

558 uri1 = butler.getURI(datasetTypeName, dataId1) 

559 uri2 = butler.getURI(datasetTypeName, dataId2) 

560 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

561 

562 # Test that removing one does not break the second 

563 # This line will issue a warning log message for a ChainedDatastore 

564 # that uses an InMemoryDatastore since in-memory can not ingest 

565 # files. 

566 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

567 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

568 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

569 multi2b = butler.get(datasetTypeName, dataId2) 

570 self.assertEqual(multi2, multi2b) 

571 

572 def testPruneCollections(self): 

573 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

574 butler = Butler(self.tmpConfigFile, writeable=True) 

575 # Load registry data with dimensions to hang datasets off of. 

576 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

577 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

578 # Add some RUN-type collections. 

579 run1 = "run1" 

580 butler.registry.registerRun(run1) 

581 run2 = "run2" 

582 butler.registry.registerRun(run2) 

583 # put some datasets. ref1 and ref2 have the same data ID, and are in 

584 # different runs. ref3 has a different data ID. 

585 metric = makeExampleMetrics() 

586 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

587 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

588 butler.registry) 

589 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

590 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

591 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

592 # Try to delete a RUN collection without purge, or with purge and not 

593 # unstore. 

594 with self.assertRaises(TypeError): 

595 butler.pruneCollection(run1) 

596 with self.assertRaises(TypeError): 

597 butler.pruneCollection(run2, purge=True) 

598 # Add a TAGGED collection and associate ref3 only into it. 

599 tag1 = "tag1" 

600 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

601 butler.registry.associate(tag1, [ref3]) 

602 # Add a CHAINED collection that searches run1 and then run2. It 

603 # logically contains only ref1, because ref2 is shadowed due to them 

604 # having the same data ID and dataset type. 

605 chain1 = "chain1" 

606 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

607 butler.registry.setCollectionChain(chain1, [run1, run2]) 

608 # Try to delete RUN collections, which should fail with complete 

609 # rollback because they're still referenced by the CHAINED 

610 # collection. 

611 with self.assertRaises(Exception): 

612 butler.pruneCollection(run1, pruge=True, unstore=True) 

613 with self.assertRaises(Exception): 

614 butler.pruneCollection(run2, pruge=True, unstore=True) 

615 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

616 [ref1, ref2, ref3]) 

617 self.assertTrue(butler.datastore.exists(ref1)) 

618 self.assertTrue(butler.datastore.exists(ref2)) 

619 self.assertTrue(butler.datastore.exists(ref3)) 

620 # Try to delete CHAINED and TAGGED collections with purge; should not 

621 # work. 

622 with self.assertRaises(TypeError): 

623 butler.pruneCollection(tag1, purge=True, unstore=True) 

624 with self.assertRaises(TypeError): 

625 butler.pruneCollection(chain1, purge=True, unstore=True) 

626 # Remove the tagged collection with unstore=False. This should not 

627 # affect the datasets. 

628 butler.pruneCollection(tag1) 

629 with self.assertRaises(MissingCollectionError): 

630 butler.registry.getCollectionType(tag1) 

631 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

632 [ref1, ref2, ref3]) 

633 self.assertTrue(butler.datastore.exists(ref1)) 

634 self.assertTrue(butler.datastore.exists(ref2)) 

635 self.assertTrue(butler.datastore.exists(ref3)) 

636 # Add the tagged collection back in, and remove it with unstore=True. 

637 # This should remove ref3 only from the datastore. 

638 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

639 butler.registry.associate(tag1, [ref3]) 

640 butler.pruneCollection(tag1, unstore=True) 

641 with self.assertRaises(MissingCollectionError): 

642 butler.registry.getCollectionType(tag1) 

643 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

644 [ref1, ref2, ref3]) 

645 self.assertTrue(butler.datastore.exists(ref1)) 

646 self.assertTrue(butler.datastore.exists(ref2)) 

647 self.assertFalse(butler.datastore.exists(ref3)) 

648 # Delete the chain with unstore=False. The datasets should not be 

649 # affected at all. 

650 butler.pruneCollection(chain1) 

651 with self.assertRaises(MissingCollectionError): 

652 butler.registry.getCollectionType(chain1) 

653 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

654 [ref1, ref2, ref3]) 

655 self.assertTrue(butler.datastore.exists(ref1)) 

656 self.assertTrue(butler.datastore.exists(ref2)) 

657 self.assertFalse(butler.datastore.exists(ref3)) 

658 # Redefine and then delete the chain with unstore=True. Only ref1 

659 # should be unstored (ref3 has already been unstored, but otherwise 

660 # would be now). 

661 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

662 butler.registry.setCollectionChain(chain1, [run1, run2]) 

663 butler.pruneCollection(chain1, unstore=True) 

664 with self.assertRaises(MissingCollectionError): 

665 butler.registry.getCollectionType(chain1) 

666 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

667 [ref1, ref2, ref3]) 

668 self.assertFalse(butler.datastore.exists(ref1)) 

669 self.assertTrue(butler.datastore.exists(ref2)) 

670 self.assertFalse(butler.datastore.exists(ref3)) 

671 # Remove run1. This removes ref1 and ref3 from the registry (they're 

672 # already gone from the datastore, which is fine). 

673 butler.pruneCollection(run1, purge=True, unstore=True) 

674 with self.assertRaises(MissingCollectionError): 

675 butler.registry.getCollectionType(run1) 

676 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

677 [ref2]) 

678 self.assertTrue(butler.datastore.exists(ref2)) 

679 # Remove run2. This removes ref2 from the registry and the datastore. 

680 butler.pruneCollection(run2, purge=True, unstore=True) 

681 with self.assertRaises(MissingCollectionError): 

682 butler.registry.getCollectionType(run2) 

683 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

684 []) 

685 

686 def testPickle(self): 

687 """Test pickle support. 

688 """ 

689 butler = Butler(self.tmpConfigFile, run="ingest") 

690 butlerOut = pickle.loads(pickle.dumps(butler)) 

691 self.assertIsInstance(butlerOut, Butler) 

692 self.assertEqual(butlerOut._config, butler._config) 

693 self.assertEqual(butlerOut.collections, butler.collections) 

694 self.assertEqual(butlerOut.run, butler.run) 

695 

696 def testGetDatasetTypes(self): 

697 butler = Butler(self.tmpConfigFile, run="ingest") 

698 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

699 dimensionEntries = [ 

700 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

701 {"instrument": "DummyCamComp"}), 

702 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}), 

703 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

704 ] 

705 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

706 # Add needed Dimensions 

707 for args in dimensionEntries: 

708 butler.registry.insertDimensionData(*args) 

709 

710 # When a DatasetType is added to the registry entries are not created 

711 # for components but querying them can return the components. 

712 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

713 components = set() 

714 for datasetTypeName in datasetTypeNames: 

715 # Create and register a DatasetType 

716 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

717 

718 for componentName in storageClass.components: 

719 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

720 

721 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

722 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

723 

724 # Now that we have some dataset types registered, validate them 

725 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

726 "datasetType.component"]) 

727 

728 # Add a new datasetType that will fail template validation 

729 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

730 if self.validationCanFail: 

731 with self.assertRaises(ValidationError): 

732 butler.validateConfiguration() 

733 

734 # Rerun validation but with a subset of dataset type names 

735 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

736 

737 # Rerun validation but ignore the bad datasetType 

738 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

739 "datasetType.component"]) 

740 

741 def testTransaction(self): 

742 butler = Butler(self.tmpConfigFile, run="ingest") 

743 datasetTypeName = "test_metric" 

744 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

745 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

746 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

747 "abstract_filter": "R"}), 

748 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

749 "physical_filter": "d-r"})) 

750 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

751 metric = makeExampleMetrics() 

752 dataId = {"instrument": "DummyCam", "visit": 42} 

753 # Create and register a DatasetType 

754 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

755 with self.assertRaises(TransactionTestError): 

756 with butler.transaction(): 

757 # Add needed Dimensions 

758 for args in dimensionEntries: 

759 butler.registry.insertDimensionData(*args) 

760 # Store a dataset 

761 ref = butler.put(metric, datasetTypeName, dataId) 

762 self.assertIsInstance(ref, DatasetRef) 

763 # Test getDirect 

764 metricOut = butler.getDirect(ref) 

765 self.assertEqual(metric, metricOut) 

766 # Test get 

767 metricOut = butler.get(datasetTypeName, dataId) 

768 self.assertEqual(metric, metricOut) 

769 # Check we can get components 

770 self.assertGetComponents(butler, ref, 

771 ("summary", "data", "output"), metric) 

772 raise TransactionTestError("This should roll back the entire transaction") 

773 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"): 

774 butler.registry.expandDataId(dataId) 

775 # Should raise LookupError for missing data ID value 

776 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

777 butler.get(datasetTypeName, dataId) 

778 # Also check explicitly if Dataset entry is missing 

779 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

780 # Direct retrieval should not find the file in the Datastore 

781 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

782 butler.getDirect(ref) 

783 

784 def testMakeRepo(self): 

785 """Test that we can write butler configuration to a new repository via 

786 the Butler.makeRepo interface and then instantiate a butler from the 

787 repo root. 

788 """ 

789 # Do not run the test if we know this datastore configuration does 

790 # not support a file system root 

791 if self.fullConfigKey is None: 

792 return 

793 

794 # Remove the file created in setUp 

795 os.unlink(self.tmpConfigFile) 

796 

797 createRegistry = not self.useTempRoot 

798 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile), 

799 createRegistry=createRegistry) 

800 limited = Config(self.configFile) 

801 butler1 = Butler(butlerConfig) 

802 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

803 config=Config(self.configFile), overwrite=True) 

804 full = Config(self.tmpConfigFile) 

805 butler2 = Butler(butlerConfig) 

806 # Butlers should have the same configuration regardless of whether 

807 # defaults were expanded. 

808 self.assertEqual(butler1._config, butler2._config) 

809 # Config files loaded directly should not be the same. 

810 self.assertNotEqual(limited, full) 

811 # Make sure "limited" doesn't have a few keys we know it should be 

812 # inheriting from defaults. 

813 self.assertIn(self.fullConfigKey, full) 

814 self.assertNotIn(self.fullConfigKey, limited) 

815 

816 # Collections don't appear until something is put in them 

817 collections1 = set(butler1.registry.queryCollections()) 

818 self.assertEqual(collections1, set()) 

819 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

820 

821 # Check that a config with no associated file name will not 

822 # work properly with relocatable Butler repo 

823 butlerConfig.configFile = None 

824 with self.assertRaises(ValueError): 

825 Butler(butlerConfig) 

826 

827 with self.assertRaises(FileExistsError): 

828 Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

829 config=Config(self.configFile), overwrite=False) 

830 

831 def testStringification(self): 

832 butler = Butler(self.tmpConfigFile, run="ingest") 

833 butlerStr = str(butler) 

834 

835 if self.datastoreStr is not None: 

836 for testStr in self.datastoreStr: 

837 self.assertIn(testStr, butlerStr) 

838 if self.registryStr is not None: 

839 self.assertIn(self.registryStr, butlerStr) 

840 

841 datastoreName = butler.datastore.name 

842 if self.datastoreName is not None: 

843 for testStr in self.datastoreName: 

844 self.assertIn(testStr, datastoreName) 

845 

846 

847class FileLikeDatastoreButlerTests(ButlerTests): 

848 """Common tests and specialization of ButlerTests for butlers backed 

849 by datastores that inherit from FileLikeDatastore. 

850 """ 

851 

852 def checkFileExists(self, root, relpath): 

853 """Checks if file exists at a given path (relative to root). 

854 

855 Test testPutTemplates verifies actual physical existance of the files 

856 in the requested location. 

857 """ 

858 uri = ButlerURI(root, forceDirectory=True) 

859 return uri.join(relpath).exists() 

860 

861 def testPutTemplates(self): 

862 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

863 butler = Butler(self.tmpConfigFile, run="ingest") 

864 

865 # Add needed Dimensions 

866 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

867 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

868 "name": "d-r", 

869 "abstract_filter": "R"}) 

870 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

871 "physical_filter": "d-r"}) 

872 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

873 "physical_filter": "d-r"}) 

874 

875 # Create and store a dataset 

876 metric = makeExampleMetrics() 

877 

878 # Create two almost-identical DatasetTypes (both will use default 

879 # template) 

880 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

881 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

882 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

883 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

884 

885 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

886 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

887 

888 # Put with exactly the data ID keys needed 

889 ref = butler.put(metric, "metric1", dataId1) 

890 uri = butler.getURI(ref) 

891 self.assertTrue(self.checkFileExists(butler.datastore.root, 

892 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"), 

893 f"Checking existence of {uri}") 

894 

895 # Check the template based on dimensions 

896 butler.datastore.templates.validateTemplates([ref]) 

897 

898 # Put with extra data ID keys (physical_filter is an optional 

899 # dependency); should not change template (at least the way we're 

900 # defining them to behave now; the important thing is that they 

901 # must be consistent). 

902 ref = butler.put(metric, "metric2", dataId2) 

903 uri = butler.getURI(ref) 

904 self.assertTrue(self.checkFileExists(butler.datastore.root, 

905 "ingest/metric2/d-r/DummyCamComp_v423.pickle"), 

906 f"Checking existence of {uri}") 

907 

908 # Check the template based on dimensions 

909 butler.datastore.templates.validateTemplates([ref]) 

910 

911 # Now use a file template that will not result in unique filenames 

912 with self.assertRaises(FileTemplateValidationError): 

913 butler.put(metric, "metric3", dataId1) 

914 

915 def testImportExport(self): 

916 # Run put/get tests just to create and populate a repo. 

917 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

918 self.runImportExportTest(storageClass) 

919 

920 @unittest.expectedFailure 

921 def testImportExportVirtualComposite(self): 

922 # Run put/get tests just to create and populate a repo. 

923 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

924 self.runImportExportTest(storageClass) 

925 

926 def runImportExportTest(self, storageClass): 

927 """This test does an export to a temp directory and an import back 

928 into a new temp directory repo. It does not assume a posix datastore""" 

929 exportButler = self.runPutGetTest(storageClass, "test_metric") 

930 print("Root:", exportButler.datastore.root) 

931 # Test that the repo actually has at least one dataset. 

932 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

933 self.assertGreater(len(datasets), 0) 

934 # Export those datasets. We used TemporaryDirectory because there 

935 # doesn't seem to be a way to get the filename (as opposed to the file 

936 # object) from any of tempfile's temporary-file context managers. 

937 with tempfile.TemporaryDirectory() as exportDir: 

938 # TODO: When PosixDatastore supports transfer-on-exist, add tests 

939 # for that. 

940 exportFile = os.path.join(exportDir, "exports.yaml") 

941 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

942 export.saveDatasets(datasets) 

943 self.assertTrue(os.path.exists(exportFile)) 

944 with tempfile.TemporaryDirectory() as importDir: 

945 # We always want this to be a local posix butler 

946 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

947 # Calling script.butlerImport tests the implementation of the 

948 # butler command line interface "import" subcommand. Functions 

949 # in the script folder are generally considered protected and 

950 # should not be used as public api. 

951 with open(exportFile, "r") as f: 

952 script.butlerImport(importDir, output_run="ingest/run", export_file=f, 

953 directory=exportDir, transfer="auto", skip_dimensions=None) 

954 importButler = Butler(importDir, run="ingest/run") 

955 for ref in datasets: 

956 with self.subTest(ref=ref): 

957 # Test for existence by passing in the DatasetType and 

958 # data ID separately, to avoid lookup by dataset_id. 

959 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

960 

961 

962class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

963 """PosixDatastore specialization of a butler""" 

964 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

965 fullConfigKey = ".datastore.formatters" 

966 validationCanFail = True 

967 datastoreStr = ["/tmp"] 

968 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"] 

969 registryStr = "/gen3.sqlite3" 

970 

971 def testExportTransferCopy(self): 

972 """Test local export using all transfer modes""" 

973 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

974 exportButler = self.runPutGetTest(storageClass, "test_metric") 

975 # Test that the repo actually has at least one dataset. 

976 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

977 self.assertGreater(len(datasets), 0) 

978 uris = [exportButler.getURI(d) for d in datasets] 

979 datastoreRoot = exportButler.datastore.root 

980 

981 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

982 

983 for path in pathsInStore: 

984 # Assume local file system 

985 self.assertTrue(self.checkFileExists(datastoreRoot, path), 

986 f"Checking path {path}") 

987 

988 for transfer in ("copy", "link", "symlink", "relsymlink"): 

989 with tempfile.TemporaryDirectory(dir=TESTDIR) as exportDir: 

990 with exportButler.export(directory=exportDir, format="yaml", 

991 transfer=transfer) as export: 

992 export.saveDatasets(datasets) 

993 for path in pathsInStore: 

994 self.assertTrue(self.checkFileExists(exportDir, path), 

995 f"Check that mode {transfer} exported files") 

996 

997 

998class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

999 """InMemoryDatastore specialization of a butler""" 

1000 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1001 fullConfigKey = None 

1002 useTempRoot = False 

1003 validationCanFail = False 

1004 datastoreStr = ["datastore='InMemory"] 

1005 datastoreName = ["InMemoryDatastore@"] 

1006 registryStr = ":memory:" 

1007 

1008 def testIngest(self): 

1009 pass 

1010 

1011 

1012class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1013 """PosixDatastore specialization""" 

1014 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1015 fullConfigKey = ".datastore.datastores.1.formatters" 

1016 validationCanFail = True 

1017 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1/,", "/PosixDatastore_2/'"] 

1018 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1", 

1019 "SecondDatastore"] 

1020 registryStr = "/gen3.sqlite3" 

1021 

1022 

1023class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1024 """Test that a yaml file in one location can refer to a root in another.""" 

1025 

1026 datastoreStr = ["dir1"] 

1027 # Disable the makeRepo test since we are deliberately not using 

1028 # butler.yaml as the config name. 

1029 fullConfigKey = None 

1030 

1031 def setUp(self): 

1032 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1033 

1034 # Make a new repository in one place 

1035 self.dir1 = os.path.join(self.root, "dir1") 

1036 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1037 

1038 # Move the yaml file to a different place and add a "root" 

1039 self.dir2 = os.path.join(self.root, "dir2") 

1040 safeMakeDir(self.dir2) 

1041 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1042 config = Config(configFile1) 

1043 config["root"] = self.dir1 

1044 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1045 config.dumpToUri(configFile2) 

1046 os.remove(configFile1) 

1047 self.tmpConfigFile = configFile2 

1048 

1049 def testFileLocations(self): 

1050 self.assertNotEqual(self.dir1, self.dir2) 

1051 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1052 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1053 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1054 

1055 

1056class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1057 """Test that a config file created by makeRepo outside of repo works.""" 

1058 

1059 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1060 

1061 def setUp(self): 

1062 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1063 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1064 

1065 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1066 Butler.makeRepo(self.root, config=Config(self.configFile), 

1067 outfile=self.tmpConfigFile) 

1068 

1069 def tearDown(self): 

1070 if os.path.exists(self.root2): 

1071 shutil.rmtree(self.root2, ignore_errors=True) 

1072 super().tearDown() 

1073 

1074 def testConfigExistence(self): 

1075 c = Config(self.tmpConfigFile) 

1076 uri_config = ButlerURI(c["root"]) 

1077 uri_expected = ButlerURI(self.root, forceDirectory=True) 

1078 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1079 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1080 

1081 def testPutGet(self): 

1082 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1083 self.runPutGetTest(storageClass, "test_metric") 

1084 

1085 

1086class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1087 """Test that a config file created by makeRepo outside of repo works.""" 

1088 

1089 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1090 

1091 def setUp(self): 

1092 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1093 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1094 

1095 self.tmpConfigFile = self.root2 

1096 Butler.makeRepo(self.root, config=Config(self.configFile), 

1097 outfile=self.tmpConfigFile) 

1098 

1099 def testConfigExistence(self): 

1100 # Append the yaml file else Config constructor does not know the file 

1101 # type. 

1102 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1103 super().testConfigExistence() 

1104 

1105 

1106class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1107 """Test that a config file created by makeRepo outside of repo works.""" 

1108 

1109 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1110 

1111 def setUp(self): 

1112 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1113 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1114 

1115 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

1116 Butler.makeRepo(self.root, config=Config(self.configFile), 

1117 outfile=self.tmpConfigFile) 

1118 

1119 

1120@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1121@mock_s3 

1122class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

1123 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1124 a local in-memory SqlRegistry. 

1125 """ 

1126 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1127 fullConfigKey = None 

1128 validationCanFail = True 

1129 

1130 bucketName = "anybucketname" 

1131 """Name of the Bucket that will be used in the tests. The name is read from 

1132 the config file used with the tests during set-up. 

1133 """ 

1134 

1135 root = "butlerRoot/" 

1136 """Root repository directory expected to be used in case useTempRoot=False. 

1137 Otherwise the root is set to a 20 characters long randomly generated string 

1138 during set-up. 

1139 """ 

1140 

1141 datastoreStr = [f"datastore={root}"] 

1142 """Contains all expected root locations in a format expected to be 

1143 returned by Butler stringification. 

1144 """ 

1145 

1146 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"] 

1147 """The expected format of the S3Datastore string.""" 

1148 

1149 registryStr = ":memory:" 

1150 """Expected format of the Registry string.""" 

1151 

1152 def genRoot(self): 

1153 """Returns a random string of len 20 to serve as a root 

1154 name for the temporary bucket repo. 

1155 

1156 This is equivalent to tempfile.mkdtemp as this is what self.root 

1157 becomes when useTempRoot is True. 

1158 """ 

1159 rndstr = "".join( 

1160 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1161 ) 

1162 return rndstr + "/" 

1163 

1164 def setUp(self): 

1165 config = Config(self.configFile) 

1166 uri = ButlerURI(config[".datastore.datastore.root"]) 

1167 self.bucketName = uri.netloc 

1168 

1169 # set up some fake credentials if they do not exist 

1170 self.usingDummyCredentials = setAwsEnvCredentials() 

1171 

1172 if self.useTempRoot: 

1173 self.root = self.genRoot() 

1174 rooturi = f"s3://{self.bucketName}/{self.root}" 

1175 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1176 

1177 # MOTO needs to know that we expect Bucket bucketname to exist 

1178 # (this used to be the class attribute bucketName) 

1179 s3 = boto3.resource("s3") 

1180 s3.create_bucket(Bucket=self.bucketName) 

1181 

1182 self.datastoreStr = f"datastore={self.root}" 

1183 self.datastoreName = [f"S3Datastore@{rooturi}"] 

1184 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1185 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1186 

1187 def tearDown(self): 

1188 s3 = boto3.resource("s3") 

1189 bucket = s3.Bucket(self.bucketName) 

1190 try: 

1191 bucket.objects.all().delete() 

1192 except botocore.exceptions.ClientError as e: 

1193 if e.response["Error"]["Code"] == "404": 

1194 # the key was not reachable - pass 

1195 pass 

1196 else: 

1197 raise 

1198 

1199 bucket = s3.Bucket(self.bucketName) 

1200 bucket.delete() 

1201 

1202 # unset any potentially set dummy credentials 

1203 if self.usingDummyCredentials: 

1204 unsetAwsEnvCredentials() 

1205 

1206 

1207@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1208# Mock required environment variables during tests 

1209@unittest.mock.patch.dict(os.environ, {"WEBDAV_AUTH_METHOD": "TOKEN", 

1210 "WEBDAV_BEARER_TOKEN": "XXXXXX"}) 

1211class WebdavDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

1212 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1213 a local in-memory SqlRegistry. 

1214 """ 

1215 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1216 fullConfigKey = None 

1217 validationCanFail = True 

1218 

1219 serverName = "localhost" 

1220 """Name of the server that will be used in the tests. 

1221 """ 

1222 

1223 portNumber = 8080 

1224 """Port on which the webdav server listens. Automatically chosen 

1225 at setUpClass via the _getfreeport() method 

1226 """ 

1227 

1228 root = "butlerRoot/" 

1229 """Root repository directory expected to be used in case useTempRoot=False. 

1230 Otherwise the root is set to a 20 characters long randomly generated string 

1231 during set-up. 

1232 """ 

1233 

1234 datastoreStr = [f"datastore={root}"] 

1235 """Contains all expected root locations in a format expected to be 

1236 returned by Butler stringification. 

1237 """ 

1238 

1239 datastoreName = ["WebdavDatastore@https://{serverName}/{root}"] 

1240 """The expected format of the WebdavDatastore string.""" 

1241 

1242 registryStr = ":memory:" 

1243 """Expected format of the Registry string.""" 

1244 

1245 serverThread = None 

1246 """Thread in which the local webdav server will run""" 

1247 

1248 stopWebdavServer = False 

1249 """This flag will cause the webdav server to 

1250 gracefully shut down when True 

1251 """ 

1252 

1253 def genRoot(self): 

1254 """Returns a random string of len 20 to serve as a root 

1255 name for the temporary bucket repo. 

1256 

1257 This is equivalent to tempfile.mkdtemp as this is what self.root 

1258 becomes when useTempRoot is True. 

1259 """ 

1260 rndstr = "".join( 

1261 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1262 ) 

1263 return rndstr + "/" 

1264 

1265 @classmethod 

1266 def setUpClass(cls): 

1267 # Do the same as inherited class 

1268 cls.storageClassFactory = StorageClassFactory() 

1269 cls.storageClassFactory.addFromConfig(cls.configFile) 

1270 

1271 cls.portNumber = cls._getfreeport() 

1272 # Run a local webdav server on which tests will be run 

1273 cls.serverThread = Thread(target=cls._serveWebdav, 

1274 args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), 

1275 daemon=True) 

1276 cls.serverThread.start() 

1277 # Wait for it to start 

1278 time.sleep(3) 

1279 

1280 @classmethod 

1281 def tearDownClass(cls): 

1282 # Ask for graceful shut down of the webdav server 

1283 cls.stopWebdavServer = True 

1284 # Wait for the thread to exit 

1285 cls.serverThread.join() 

1286 

1287 # Mock required environment variables during tests 

1288 @unittest.mock.patch.dict(os.environ, {"WEBDAV_AUTH_METHOD": "TOKEN", 

1289 "WEBDAV_BEARER_TOKEN": "XXXXXX"}) 

1290 def setUp(self): 

1291 config = Config(self.configFile) 

1292 

1293 if self.useTempRoot: 

1294 self.root = self.genRoot() 

1295 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1296 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1297 

1298 self.datastoreStr = f"datastore={self.root}" 

1299 self.datastoreName = [f"WebdavDatastore@{self.rooturi}"] 

1300 

1301 if not isWebdavEndpoint(self.rooturi): 

1302 raise OSError("Webdav server not running properly: cannot run tests.") 

1303 

1304 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1305 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1306 

1307 # Mock required environment variables during tests 

1308 @unittest.mock.patch.dict(os.environ, {"WEBDAV_AUTH_METHOD": "TOKEN", 

1309 "WEBDAV_BEARER_TOKEN": "XXXXXX"}) 

1310 def tearDown(self): 

1311 # Clear temporary directory 

1312 ButlerURI(self.rooturi).remove() 

1313 

1314 def _serveWebdav(self, port: int, stopWebdavServer): 

1315 """Starts a local webdav-compatible HTTP server, 

1316 Listening on http://localhost:8080 

1317 This server only runs when this test class is instantiated, 

1318 and then shuts down. Must be started is a separate thread. 

1319 

1320 Parameters 

1321 ---------- 

1322 port : `int` 

1323 The port number on which the server should listen 

1324 """ 

1325 root_path = gettempdir() 

1326 

1327 config = { 

1328 "host": "0.0.0.0", 

1329 "port": port, 

1330 "provider_mapping": {"/": root_path}, 

1331 "http_authenticator": { 

1332 "domain_controller": None 

1333 }, 

1334 "simple_dc": {"user_mapping": {"*": True}}, 

1335 "verbose": 0, 

1336 } 

1337 app = WsgiDAVApp(config) 

1338 

1339 server_args = { 

1340 "bind_addr": (config["host"], config["port"]), 

1341 "wsgi_app": app, 

1342 } 

1343 server = wsgi.Server(**server_args) 

1344 server.prepare() 

1345 

1346 try: 

1347 # Start the actual server in a separate thread 

1348 t = Thread(target=server.serve, daemon=True) 

1349 t.start() 

1350 # watch stopWebdavServer, and gracefully 

1351 # shut down the server when True 

1352 while True: 

1353 if stopWebdavServer(): 

1354 break 

1355 time.sleep(1) 

1356 except KeyboardInterrupt: 

1357 print("Caught Ctrl-C, shutting down...") 

1358 finally: 

1359 server.stop() 

1360 t.join() 

1361 

1362 def _getfreeport(): 

1363 """ 

1364 Determines a free port using sockets. 

1365 """ 

1366 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

1367 free_socket.bind(('0.0.0.0', 0)) 

1368 free_socket.listen() 

1369 port = free_socket.getsockname()[1] 

1370 free_socket.close() 

1371 return port 

1372 

1373 

1374if __name__ == "__main__": 1374 ↛ 1375line 1374 didn't jump to line 1375, because the condition on line 1374 was never true

1375 unittest.main()