Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import os 

26import posixpath 

27import unittest 

28import tempfile 

29import shutil 

30import pickle 

31import string 

32import random 

33import time 

34import socket 

35 

36try: 

37 import boto3 

38 import botocore 

39 from moto import mock_s3 

40except ImportError: 

41 boto3 = None 

42 

43 def mock_s3(cls): 

44 """A no-op decorator in case moto mock_s3 can not be imported. 

45 """ 

46 return cls 

47 

48try: 

49 from cheroot import wsgi 

50 from wsgidav.wsgidav_app import WsgiDAVApp 

51except ImportError: 

52 WsgiDAVApp = None 

53 

54import astropy.time 

55from threading import Thread 

56from tempfile import gettempdir 

57from lsst.utils import doImport 

58from lsst.daf.butler.core.utils import safeMakeDir 

59from lsst.daf.butler import Butler, Config, ButlerConfig 

60from lsst.daf.butler import StorageClassFactory 

61from lsst.daf.butler import DatasetType, DatasetRef 

62from lsst.daf.butler import FileTemplateValidationError, ValidationError 

63from lsst.daf.butler import FileDataset 

64from lsst.daf.butler import CollectionSearch, CollectionType 

65from lsst.daf.butler import ButlerURI 

66from lsst.daf.butler import script 

67from lsst.daf.butler.registry import MissingCollectionError, OrphanedRecordError 

68from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

69from lsst.daf.butler.core.s3utils import (setAwsEnvCredentials, 

70 unsetAwsEnvCredentials) 

71from lsst.daf.butler.core.webdavutils import isWebdavEndpoint 

72 

73from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

74 

75TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

76 

77 

78def makeExampleMetrics(): 

79 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

80 {"a": [1, 2, 3], 

81 "b": {"blue": 5, "red": "green"}}, 

82 [563, 234, 456.7, 752, 8, 9, 27] 

83 ) 

84 

85 

86class TransactionTestError(Exception): 

87 """Specific error for testing transactions, to prevent misdiagnosing 

88 that might otherwise occur when a standard exception is used. 

89 """ 

90 pass 

91 

92 

93class ButlerConfigTests(unittest.TestCase): 

94 """Simple tests for ButlerConfig that are not tested in other test cases. 

95 """ 

96 

97 def testSearchPath(self): 

98 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

99 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

100 config1 = ButlerConfig(configFile) 

101 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

102 

103 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

104 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

105 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

106 self.assertIn("testConfigs", "\n".join(cm.output)) 

107 

108 key = ("datastore", "records", "table") 

109 self.assertNotEqual(config1[key], config2[key]) 

110 self.assertEqual(config2[key], "override_record") 

111 

112 

113class ButlerPutGetTests: 

114 """Helper method for running a suite of put/get tests from different 

115 butler configurations.""" 

116 

117 root = None 

118 

119 @staticmethod 

120 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

121 """Create a DatasetType and register it 

122 """ 

123 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

124 registry.registerDatasetType(datasetType) 

125 return datasetType 

126 

127 @classmethod 

128 def setUpClass(cls): 

129 cls.storageClassFactory = StorageClassFactory() 

130 cls.storageClassFactory.addFromConfig(cls.configFile) 

131 

132 def assertGetComponents(self, butler, datasetRef, components, reference): 

133 datasetType = datasetRef.datasetType 

134 dataId = datasetRef.dataId 

135 for component in components: 

136 compTypeName = datasetType.componentTypeName(component) 

137 result = butler.get(compTypeName, dataId) 

138 self.assertEqual(result, getattr(reference, component)) 

139 

140 def tearDown(self): 

141 if self.root is not None and os.path.exists(self.root): 

142 shutil.rmtree(self.root, ignore_errors=True) 

143 

144 def runPutGetTest(self, storageClass, datasetTypeName): 

145 # New datasets will be added to run and tag, but we will only look in 

146 # tag when looking up datasets. 

147 run = "ingest/run" 

148 tag = "ingest" 

149 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag]) 

150 

151 # There will not be a collection yet 

152 collections = set(butler.registry.queryCollections()) 

153 self.assertEqual(collections, set([run, tag])) 

154 

155 # Create and register a DatasetType 

156 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

157 

158 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

159 

160 # Add needed Dimensions 

161 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

162 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

163 "name": "d-r", 

164 "band": "R"}) 

165 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp", 

166 "id": 1, 

167 "name": "default"}) 

168 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

169 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

170 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

171 "name": "fourtwentythree", "physical_filter": "d-r", 

172 "visit_system": 1, "datetime_begin": visit_start, 

173 "datetime_end": visit_end}) 

174 

175 # Add a second visit for some later tests 

176 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424, 

177 "name": "fourtwentyfour", "physical_filter": "d-r", 

178 "visit_system": 1}) 

179 

180 # Create and store a dataset 

181 metric = makeExampleMetrics() 

182 dataId = {"instrument": "DummyCamComp", "visit": 423} 

183 

184 # Create a DatasetRef for put 

185 refIn = DatasetRef(datasetType, dataId, id=None) 

186 

187 # Put with a preexisting id should fail 

188 with self.assertRaises(ValueError): 

189 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

190 

191 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

192 # and once with a DatasetType 

193 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

194 with self.subTest(args=args): 

195 ref = butler.put(metric, *args) 

196 self.assertIsInstance(ref, DatasetRef) 

197 

198 # Test getDirect 

199 metricOut = butler.getDirect(ref) 

200 self.assertEqual(metric, metricOut) 

201 # Test get 

202 metricOut = butler.get(ref.datasetType.name, dataId) 

203 self.assertEqual(metric, metricOut) 

204 # Test get with a datasetRef 

205 metricOut = butler.get(ref) 

206 self.assertEqual(metric, metricOut) 

207 # Test getDeferred with dataId 

208 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get() 

209 self.assertEqual(metric, metricOut) 

210 # Test getDeferred with a datasetRef 

211 metricOut = butler.getDeferred(ref).get() 

212 self.assertEqual(metric, metricOut) 

213 

214 # Check we can get components 

215 if storageClass.isComposite(): 

216 self.assertGetComponents(butler, ref, 

217 ("summary", "data", "output"), metric) 

218 

219 # Remove from the tagged collection only; after that we 

220 # shouldn't be able to find it unless we use the dataset_id. 

221 butler.pruneDatasets([ref]) 

222 with self.assertRaises(LookupError): 

223 butler.datasetExists(*args) 

224 # Registry still knows about it, if we use the dataset_id. 

225 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

226 # If we use the output ref with the dataset_id, we should 

227 # still be able to load it with getDirect(). 

228 self.assertEqual(metric, butler.getDirect(ref)) 

229 

230 # Reinsert into collection, then delete from Datastore *and* 

231 # remove from collection. 

232 butler.registry.associate(tag, [ref]) 

233 butler.pruneDatasets([ref], unstore=True) 

234 # Lookup with original args should still fail. 

235 with self.assertRaises(LookupError): 

236 butler.datasetExists(*args) 

237 # Now getDirect() should fail, too. 

238 with self.assertRaises(FileNotFoundError, msg=f"Checking ref {ref} not found"): 

239 butler.getDirect(ref) 

240 # Registry still knows about it, if we use the dataset_id. 

241 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

242 

243 # Now remove the dataset completely. 

244 butler.pruneDatasets([ref], purge=True, unstore=True) 

245 # Lookup with original args should still fail. 

246 with self.assertRaises(LookupError): 

247 butler.datasetExists(*args) 

248 # getDirect() should still fail. 

249 with self.assertRaises(FileNotFoundError): 

250 butler.getDirect(ref) 

251 # Registry shouldn't be able to find it by dataset_id anymore. 

252 self.assertIsNone(butler.registry.getDataset(ref.id)) 

253 

254 # Put the dataset again, since the last thing we did was remove it. 

255 ref = butler.put(metric, refIn) 

256 

257 # Get with parameters 

258 stop = 4 

259 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

260 self.assertNotEqual(metric, sliced) 

261 self.assertEqual(metric.summary, sliced.summary) 

262 self.assertEqual(metric.output, sliced.output) 

263 self.assertEqual(metric.data[:stop], sliced.data) 

264 # getDeferred with parameters 

265 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

266 self.assertNotEqual(metric, sliced) 

267 self.assertEqual(metric.summary, sliced.summary) 

268 self.assertEqual(metric.output, sliced.output) 

269 self.assertEqual(metric.data[:stop], sliced.data) 

270 # getDeferred with deferred parameters 

271 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

272 self.assertNotEqual(metric, sliced) 

273 self.assertEqual(metric.summary, sliced.summary) 

274 self.assertEqual(metric.output, sliced.output) 

275 self.assertEqual(metric.data[:stop], sliced.data) 

276 

277 if storageClass.isComposite(): 

278 # Check that components can be retrieved 

279 metricOut = butler.get(ref.datasetType.name, dataId) 

280 compNameS = ref.datasetType.componentTypeName("summary") 

281 compNameD = ref.datasetType.componentTypeName("data") 

282 summary = butler.get(compNameS, dataId) 

283 self.assertEqual(summary, metric.summary) 

284 data = butler.get(compNameD, dataId) 

285 self.assertEqual(data, metric.data) 

286 

287 if "counter" in storageClass.derivedComponents: 

288 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

289 self.assertEqual(count, len(data)) 

290 

291 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId, 

292 parameters={"slice": slice(stop)}) 

293 self.assertEqual(count, stop) 

294 

295 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

296 summary = butler.getDirect(compRef) 

297 self.assertEqual(summary, metric.summary) 

298 

299 # Create a Dataset type that has the same name but is inconsistent. 

300 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions, 

301 self.storageClassFactory.getStorageClass("Config")) 

302 

303 # Getting with a dataset type that does not match registry fails 

304 with self.assertRaises(ValueError): 

305 butler.get(inconsistentDatasetType, dataId) 

306 

307 # Combining a DatasetRef with a dataId should fail 

308 with self.assertRaises(ValueError): 

309 butler.get(ref, dataId) 

310 # Getting with an explicit ref should fail if the id doesn't match 

311 with self.assertRaises(ValueError): 

312 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

313 

314 # Getting a dataset with unknown parameters should fail 

315 with self.assertRaises(KeyError): 

316 butler.get(ref, parameters={"unsupported": True}) 

317 

318 # Check we have a collection 

319 collections = set(butler.registry.queryCollections()) 

320 self.assertEqual(collections, {run, tag}) 

321 

322 # Clean up to check that we can remove something that may have 

323 # already had a component removed 

324 butler.pruneDatasets([ref], unstore=True, purge=True) 

325 

326 # Add a dataset back in since some downstream tests require 

327 # something to be present 

328 ref = butler.put(metric, refIn) 

329 

330 return butler 

331 

332 def testDeferredCollectionPassing(self): 

333 # Construct a butler with no run or collection, but make it writeable. 

334 butler = Butler(self.tmpConfigFile, writeable=True) 

335 # Create and register a DatasetType 

336 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

337 datasetType = self.addDatasetType("example", dimensions, 

338 self.storageClassFactory.getStorageClass("StructuredData"), 

339 butler.registry) 

340 # Add needed Dimensions 

341 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

342 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

343 "name": "d-r", 

344 "band": "R"}) 

345 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

346 "name": "fourtwentythree", "physical_filter": "d-r"}) 

347 dataId = {"instrument": "DummyCamComp", "visit": 423} 

348 # Create dataset. 

349 metric = makeExampleMetrics() 

350 # Register a new run and put dataset. 

351 run = "deferred" 

352 butler.registry.registerRun(run) 

353 ref = butler.put(metric, datasetType, dataId, run=run) 

354 # Putting with no run should fail with TypeError. 

355 with self.assertRaises(TypeError): 

356 butler.put(metric, datasetType, dataId) 

357 # Dataset should exist. 

358 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

359 # We should be able to get the dataset back, but with and without 

360 # a deferred dataset handle. 

361 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

362 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

363 # Trying to find the dataset without any collection is a TypeError. 

364 with self.assertRaises(TypeError): 

365 butler.datasetExists(datasetType, dataId) 

366 with self.assertRaises(TypeError): 

367 butler.get(datasetType, dataId) 

368 # Associate the dataset with a different collection. 

369 butler.registry.registerCollection("tagged") 

370 butler.registry.associate("tagged", [ref]) 

371 # Deleting the dataset from the new collection should make it findable 

372 # in the original collection. 

373 butler.pruneDatasets([ref], tags=["tagged"]) 

374 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

375 

376 

377class ButlerTests(ButlerPutGetTests): 

378 """Tests for Butler. 

379 """ 

380 useTempRoot = True 

381 

382 def setUp(self): 

383 """Create a new butler root for each test.""" 

384 if self.useTempRoot: 

385 self.root = tempfile.mkdtemp(dir=TESTDIR) 

386 Butler.makeRepo(self.root, config=Config(self.configFile)) 

387 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

388 else: 

389 self.root = None 

390 self.tmpConfigFile = self.configFile 

391 

392 def testConstructor(self): 

393 """Independent test of constructor. 

394 """ 

395 butler = Butler(self.tmpConfigFile, run="ingest") 

396 self.assertIsInstance(butler, Butler) 

397 

398 collections = set(butler.registry.queryCollections()) 

399 self.assertEqual(collections, {"ingest"}) 

400 

401 butler2 = Butler(butler=butler, collections=["other"]) 

402 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"])) 

403 self.assertIsNone(butler2.run) 

404 self.assertIs(butler.registry, butler2.registry) 

405 self.assertIs(butler.datastore, butler2.datastore) 

406 

407 def testBasicPutGet(self): 

408 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

409 self.runPutGetTest(storageClass, "test_metric") 

410 

411 def testCompositePutGetConcrete(self): 

412 

413 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

414 butler = self.runPutGetTest(storageClass, "test_metric") 

415 

416 # Should *not* be disassembled 

417 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

418 self.assertEqual(len(datasets), 1) 

419 uri, components = butler.getURIs(datasets[0]) 

420 self.assertIsInstance(uri, ButlerURI) 

421 self.assertFalse(components) 

422 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

423 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

424 

425 # Predicted dataset 

426 dataId = {"instrument": "DummyCamComp", "visit": 424} 

427 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

428 self.assertFalse(components) 

429 self.assertIsInstance(uri, ButlerURI) 

430 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

431 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

432 

433 def testCompositePutGetVirtual(self): 

434 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

435 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

436 

437 # Should be disassembled 

438 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

439 self.assertEqual(len(datasets), 1) 

440 uri, components = butler.getURIs(datasets[0]) 

441 

442 if butler.datastore.isEphemeral: 

443 # Never disassemble in-memory datastore 

444 self.assertIsInstance(uri, ButlerURI) 

445 self.assertFalse(components) 

446 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

447 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

448 else: 

449 self.assertIsNone(uri) 

450 self.assertEqual(set(components), set(storageClass.components)) 

451 for compuri in components.values(): 

452 self.assertIsInstance(compuri, ButlerURI) 

453 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

454 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

455 

456 # Predicted dataset 

457 dataId = {"instrument": "DummyCamComp", "visit": 424} 

458 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

459 

460 if butler.datastore.isEphemeral: 

461 # Never disassembled 

462 self.assertIsInstance(uri, ButlerURI) 

463 self.assertFalse(components) 

464 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

465 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

466 else: 

467 self.assertIsNone(uri) 

468 self.assertEqual(set(components), set(storageClass.components)) 

469 for compuri in components.values(): 

470 self.assertIsInstance(compuri, ButlerURI) 

471 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

472 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

473 

474 def testIngest(self): 

475 butler = Butler(self.tmpConfigFile, run="ingest") 

476 

477 # Create and register a DatasetType 

478 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

479 

480 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

481 datasetTypeName = "metric" 

482 

483 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

484 

485 # Add needed Dimensions 

486 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

487 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

488 "name": "d-r", 

489 "band": "R"}) 

490 for detector in (1, 2): 

491 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

492 "full_name": f"detector{detector}"}) 

493 

494 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

495 "name": "fourtwentythree", "physical_filter": "d-r"}, 

496 {"instrument": "DummyCamComp", "id": 424, 

497 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

498 

499 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

500 dataRoot = os.path.join(TESTDIR, "data", "basic") 

501 datasets = [] 

502 for detector in (1, 2): 

503 detector_name = f"detector_{detector}" 

504 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

505 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

506 # Create a DatasetRef for ingest 

507 refIn = DatasetRef(datasetType, dataId, id=None) 

508 

509 datasets.append(FileDataset(path=metricFile, 

510 refs=[refIn], 

511 formatter=formatter)) 

512 

513 butler.ingest(*datasets, transfer="copy") 

514 

515 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

516 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

517 

518 metrics1 = butler.get(datasetTypeName, dataId1) 

519 metrics2 = butler.get(datasetTypeName, dataId2) 

520 self.assertNotEqual(metrics1, metrics2) 

521 

522 # Compare URIs 

523 uri1 = butler.getURI(datasetTypeName, dataId1) 

524 uri2 = butler.getURI(datasetTypeName, dataId2) 

525 self.assertNotEqual(uri1, uri2) 

526 

527 # Now do a multi-dataset but single file ingest 

528 metricFile = os.path.join(dataRoot, "detectors.yaml") 

529 refs = [] 

530 for detector in (1, 2): 

531 detector_name = f"detector_{detector}" 

532 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

533 # Create a DatasetRef for ingest 

534 refs.append(DatasetRef(datasetType, dataId, id=None)) 

535 

536 datasets = [] 

537 datasets.append(FileDataset(path=metricFile, 

538 refs=refs, 

539 formatter=MultiDetectorFormatter)) 

540 

541 butler.ingest(*datasets, transfer="copy") 

542 

543 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

544 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

545 

546 multi1 = butler.get(datasetTypeName, dataId1) 

547 multi2 = butler.get(datasetTypeName, dataId2) 

548 

549 self.assertEqual(multi1, metrics1) 

550 self.assertEqual(multi2, metrics2) 

551 

552 # Compare URIs 

553 uri1 = butler.getURI(datasetTypeName, dataId1) 

554 uri2 = butler.getURI(datasetTypeName, dataId2) 

555 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

556 

557 # Test that removing one does not break the second 

558 # This line will issue a warning log message for a ChainedDatastore 

559 # that uses an InMemoryDatastore since in-memory can not ingest 

560 # files. 

561 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

562 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

563 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

564 multi2b = butler.get(datasetTypeName, dataId2) 

565 self.assertEqual(multi2, multi2b) 

566 

567 def testPruneCollections(self): 

568 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

569 butler = Butler(self.tmpConfigFile, writeable=True) 

570 # Load registry data with dimensions to hang datasets off of. 

571 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

572 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

573 # Add some RUN-type collections. 

574 run1 = "run1" 

575 butler.registry.registerRun(run1) 

576 run2 = "run2" 

577 butler.registry.registerRun(run2) 

578 # put some datasets. ref1 and ref2 have the same data ID, and are in 

579 # different runs. ref3 has a different data ID. 

580 metric = makeExampleMetrics() 

581 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

582 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

583 butler.registry) 

584 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

585 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

586 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

587 

588 # Add a new dataset type and delete it 

589 tmpName = "prune_collections_disposable" 

590 tmpDatasetType = self.addDatasetType(tmpName, dimensions, storageClass, 

591 butler.registry) 

592 tmpFromRegistry = butler.registry.getDatasetType(tmpName) 

593 self.assertEqual(tmpDatasetType, tmpFromRegistry) 

594 butler.registry.removeDatasetType(tmpName) 

595 with self.assertRaises(KeyError): 

596 butler.registry.getDatasetType(tmpName) 

597 # Removing a second time is fine 

598 butler.registry.removeDatasetType(tmpName) 

599 

600 # Component removal is not allowed 

601 with self.assertRaises(ValueError): 

602 butler.registry.removeDatasetType(DatasetType.nameWithComponent(tmpName, "component")) 

603 

604 # Try and fail to delete a datasetType that is associated with data 

605 with self.assertRaises(OrphanedRecordError): 

606 butler.registry.removeDatasetType(datasetType.name) 

607 

608 # Try to delete a RUN collection without purge, or with purge and not 

609 # unstore. 

610 with self.assertRaises(TypeError): 

611 butler.pruneCollection(run1) 

612 with self.assertRaises(TypeError): 

613 butler.pruneCollection(run2, purge=True) 

614 # Add a TAGGED collection and associate ref3 only into it. 

615 tag1 = "tag1" 

616 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

617 butler.registry.associate(tag1, [ref3]) 

618 # Add a CHAINED collection that searches run1 and then run2. It 

619 # logically contains only ref1, because ref2 is shadowed due to them 

620 # having the same data ID and dataset type. 

621 chain1 = "chain1" 

622 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

623 butler.registry.setCollectionChain(chain1, [run1, run2]) 

624 # Try to delete RUN collections, which should fail with complete 

625 # rollback because they're still referenced by the CHAINED 

626 # collection. 

627 with self.assertRaises(Exception): 

628 butler.pruneCollection(run1, pruge=True, unstore=True) 

629 with self.assertRaises(Exception): 

630 butler.pruneCollection(run2, pruge=True, unstore=True) 

631 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

632 [ref1, ref2, ref3]) 

633 self.assertTrue(butler.datastore.exists(ref1)) 

634 self.assertTrue(butler.datastore.exists(ref2)) 

635 self.assertTrue(butler.datastore.exists(ref3)) 

636 # Try to delete CHAINED and TAGGED collections with purge; should not 

637 # work. 

638 with self.assertRaises(TypeError): 

639 butler.pruneCollection(tag1, purge=True, unstore=True) 

640 with self.assertRaises(TypeError): 

641 butler.pruneCollection(chain1, purge=True, unstore=True) 

642 # Remove the tagged collection with unstore=False. This should not 

643 # affect the datasets. 

644 butler.pruneCollection(tag1) 

645 with self.assertRaises(MissingCollectionError): 

646 butler.registry.getCollectionType(tag1) 

647 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

648 [ref1, ref2, ref3]) 

649 self.assertTrue(butler.datastore.exists(ref1)) 

650 self.assertTrue(butler.datastore.exists(ref2)) 

651 self.assertTrue(butler.datastore.exists(ref3)) 

652 # Add the tagged collection back in, and remove it with unstore=True. 

653 # This should remove ref3 only from the datastore. 

654 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

655 butler.registry.associate(tag1, [ref3]) 

656 butler.pruneCollection(tag1, unstore=True) 

657 with self.assertRaises(MissingCollectionError): 

658 butler.registry.getCollectionType(tag1) 

659 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

660 [ref1, ref2, ref3]) 

661 self.assertTrue(butler.datastore.exists(ref1)) 

662 self.assertTrue(butler.datastore.exists(ref2)) 

663 self.assertFalse(butler.datastore.exists(ref3)) 

664 # Delete the chain with unstore=False. The datasets should not be 

665 # affected at all. 

666 butler.pruneCollection(chain1) 

667 with self.assertRaises(MissingCollectionError): 

668 butler.registry.getCollectionType(chain1) 

669 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

670 [ref1, ref2, ref3]) 

671 self.assertTrue(butler.datastore.exists(ref1)) 

672 self.assertTrue(butler.datastore.exists(ref2)) 

673 self.assertFalse(butler.datastore.exists(ref3)) 

674 # Redefine and then delete the chain with unstore=True. Only ref1 

675 # should be unstored (ref3 has already been unstored, but otherwise 

676 # would be now). 

677 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

678 butler.registry.setCollectionChain(chain1, [run1, run2]) 

679 butler.pruneCollection(chain1, unstore=True) 

680 with self.assertRaises(MissingCollectionError): 

681 butler.registry.getCollectionType(chain1) 

682 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

683 [ref1, ref2, ref3]) 

684 self.assertFalse(butler.datastore.exists(ref1)) 

685 self.assertTrue(butler.datastore.exists(ref2)) 

686 self.assertFalse(butler.datastore.exists(ref3)) 

687 # Remove run1. This removes ref1 and ref3 from the registry (they're 

688 # already gone from the datastore, which is fine). 

689 butler.pruneCollection(run1, purge=True, unstore=True) 

690 with self.assertRaises(MissingCollectionError): 

691 butler.registry.getCollectionType(run1) 

692 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

693 [ref2]) 

694 self.assertTrue(butler.datastore.exists(ref2)) 

695 # Remove run2. This removes ref2 from the registry and the datastore. 

696 butler.pruneCollection(run2, purge=True, unstore=True) 

697 with self.assertRaises(MissingCollectionError): 

698 butler.registry.getCollectionType(run2) 

699 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

700 []) 

701 

702 # Now that the collections have been pruned we can remove the 

703 # dataset type 

704 butler.registry.removeDatasetType(datasetType.name) 

705 

706 def testPickle(self): 

707 """Test pickle support. 

708 """ 

709 butler = Butler(self.tmpConfigFile, run="ingest") 

710 butlerOut = pickle.loads(pickle.dumps(butler)) 

711 self.assertIsInstance(butlerOut, Butler) 

712 self.assertEqual(butlerOut._config, butler._config) 

713 self.assertEqual(butlerOut.collections, butler.collections) 

714 self.assertEqual(butlerOut.run, butler.run) 

715 

716 def testGetDatasetTypes(self): 

717 butler = Butler(self.tmpConfigFile, run="ingest") 

718 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

719 dimensionEntries = [ 

720 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

721 {"instrument": "DummyCamComp"}), 

722 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

723 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

724 ] 

725 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

726 # Add needed Dimensions 

727 for args in dimensionEntries: 

728 butler.registry.insertDimensionData(*args) 

729 

730 # When a DatasetType is added to the registry entries are not created 

731 # for components but querying them can return the components. 

732 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

733 components = set() 

734 for datasetTypeName in datasetTypeNames: 

735 # Create and register a DatasetType 

736 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

737 

738 for componentName in storageClass.components: 

739 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

740 

741 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

742 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

743 

744 # Now that we have some dataset types registered, validate them 

745 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

746 "datasetType.component"]) 

747 

748 # Add a new datasetType that will fail template validation 

749 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

750 if self.validationCanFail: 

751 with self.assertRaises(ValidationError): 

752 butler.validateConfiguration() 

753 

754 # Rerun validation but with a subset of dataset type names 

755 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

756 

757 # Rerun validation but ignore the bad datasetType 

758 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

759 "datasetType.component"]) 

760 

761 def testTransaction(self): 

762 butler = Butler(self.tmpConfigFile, run="ingest") 

763 datasetTypeName = "test_metric" 

764 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

765 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

766 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

767 "band": "R"}), 

768 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

769 "physical_filter": "d-r"})) 

770 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

771 metric = makeExampleMetrics() 

772 dataId = {"instrument": "DummyCam", "visit": 42} 

773 # Create and register a DatasetType 

774 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

775 with self.assertRaises(TransactionTestError): 

776 with butler.transaction(): 

777 # Add needed Dimensions 

778 for args in dimensionEntries: 

779 butler.registry.insertDimensionData(*args) 

780 # Store a dataset 

781 ref = butler.put(metric, datasetTypeName, dataId) 

782 self.assertIsInstance(ref, DatasetRef) 

783 # Test getDirect 

784 metricOut = butler.getDirect(ref) 

785 self.assertEqual(metric, metricOut) 

786 # Test get 

787 metricOut = butler.get(datasetTypeName, dataId) 

788 self.assertEqual(metric, metricOut) 

789 # Check we can get components 

790 self.assertGetComponents(butler, ref, 

791 ("summary", "data", "output"), metric) 

792 raise TransactionTestError("This should roll back the entire transaction") 

793 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"): 

794 butler.registry.expandDataId(dataId) 

795 # Should raise LookupError for missing data ID value 

796 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

797 butler.get(datasetTypeName, dataId) 

798 # Also check explicitly if Dataset entry is missing 

799 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

800 # Direct retrieval should not find the file in the Datastore 

801 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

802 butler.getDirect(ref) 

803 

804 def testMakeRepo(self): 

805 """Test that we can write butler configuration to a new repository via 

806 the Butler.makeRepo interface and then instantiate a butler from the 

807 repo root. 

808 """ 

809 # Do not run the test if we know this datastore configuration does 

810 # not support a file system root 

811 if self.fullConfigKey is None: 

812 return 

813 

814 # Remove the file created in setUp 

815 os.unlink(self.tmpConfigFile) 

816 

817 createRegistry = not self.useTempRoot 

818 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile), 

819 createRegistry=createRegistry) 

820 limited = Config(self.configFile) 

821 butler1 = Butler(butlerConfig) 

822 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

823 config=Config(self.configFile), overwrite=True) 

824 full = Config(self.tmpConfigFile) 

825 butler2 = Butler(butlerConfig) 

826 # Butlers should have the same configuration regardless of whether 

827 # defaults were expanded. 

828 self.assertEqual(butler1._config, butler2._config) 

829 # Config files loaded directly should not be the same. 

830 self.assertNotEqual(limited, full) 

831 # Make sure "limited" doesn't have a few keys we know it should be 

832 # inheriting from defaults. 

833 self.assertIn(self.fullConfigKey, full) 

834 self.assertNotIn(self.fullConfigKey, limited) 

835 

836 # Collections don't appear until something is put in them 

837 collections1 = set(butler1.registry.queryCollections()) 

838 self.assertEqual(collections1, set()) 

839 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

840 

841 # Check that a config with no associated file name will not 

842 # work properly with relocatable Butler repo 

843 butlerConfig.configFile = None 

844 with self.assertRaises(ValueError): 

845 Butler(butlerConfig) 

846 

847 with self.assertRaises(FileExistsError): 

848 Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

849 config=Config(self.configFile), overwrite=False) 

850 

851 def testStringification(self): 

852 butler = Butler(self.tmpConfigFile, run="ingest") 

853 butlerStr = str(butler) 

854 

855 if self.datastoreStr is not None: 

856 for testStr in self.datastoreStr: 

857 self.assertIn(testStr, butlerStr) 

858 if self.registryStr is not None: 

859 self.assertIn(self.registryStr, butlerStr) 

860 

861 datastoreName = butler.datastore.name 

862 if self.datastoreName is not None: 

863 for testStr in self.datastoreName: 

864 self.assertIn(testStr, datastoreName) 

865 

866 

867class FileLikeDatastoreButlerTests(ButlerTests): 

868 """Common tests and specialization of ButlerTests for butlers backed 

869 by datastores that inherit from FileLikeDatastore. 

870 """ 

871 

872 def checkFileExists(self, root, relpath): 

873 """Checks if file exists at a given path (relative to root). 

874 

875 Test testPutTemplates verifies actual physical existance of the files 

876 in the requested location. 

877 """ 

878 uri = ButlerURI(root, forceDirectory=True) 

879 return uri.join(relpath).exists() 

880 

881 def testPutTemplates(self): 

882 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

883 butler = Butler(self.tmpConfigFile, run="ingest") 

884 

885 # Add needed Dimensions 

886 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

887 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

888 "name": "d-r", 

889 "band": "R"}) 

890 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

891 "physical_filter": "d-r"}) 

892 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

893 "physical_filter": "d-r"}) 

894 

895 # Create and store a dataset 

896 metric = makeExampleMetrics() 

897 

898 # Create two almost-identical DatasetTypes (both will use default 

899 # template) 

900 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

901 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

902 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

903 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

904 

905 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

906 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

907 

908 # Put with exactly the data ID keys needed 

909 ref = butler.put(metric, "metric1", dataId1) 

910 uri = butler.getURI(ref) 

911 self.assertTrue(self.checkFileExists(butler.datastore.root, 

912 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"), 

913 f"Checking existence of {uri}") 

914 

915 # Check the template based on dimensions 

916 butler.datastore.templates.validateTemplates([ref]) 

917 

918 # Put with extra data ID keys (physical_filter is an optional 

919 # dependency); should not change template (at least the way we're 

920 # defining them to behave now; the important thing is that they 

921 # must be consistent). 

922 ref = butler.put(metric, "metric2", dataId2) 

923 uri = butler.getURI(ref) 

924 self.assertTrue(self.checkFileExists(butler.datastore.root, 

925 "ingest/metric2/d-r/DummyCamComp_v423.pickle"), 

926 f"Checking existence of {uri}") 

927 

928 # Check the template based on dimensions 

929 butler.datastore.templates.validateTemplates([ref]) 

930 

931 # Now use a file template that will not result in unique filenames 

932 with self.assertRaises(FileTemplateValidationError): 

933 butler.put(metric, "metric3", dataId1) 

934 

935 def testImportExport(self): 

936 # Run put/get tests just to create and populate a repo. 

937 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

938 self.runImportExportTest(storageClass) 

939 

940 @unittest.expectedFailure 

941 def testImportExportVirtualComposite(self): 

942 # Run put/get tests just to create and populate a repo. 

943 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

944 self.runImportExportTest(storageClass) 

945 

946 def runImportExportTest(self, storageClass): 

947 """This test does an export to a temp directory and an import back 

948 into a new temp directory repo. It does not assume a posix datastore""" 

949 exportButler = self.runPutGetTest(storageClass, "test_metric") 

950 print("Root:", exportButler.datastore.root) 

951 # Test that the repo actually has at least one dataset. 

952 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

953 self.assertGreater(len(datasets), 0) 

954 # Add a DimensionRecord that's unused by those datasets. 

955 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

956 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

957 # Export and then import datasets. 

958 with tempfile.TemporaryDirectory() as exportDir: 

959 exportFile = os.path.join(exportDir, "exports.yaml") 

960 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

961 export.saveDatasets(datasets) 

962 # Export the same datasets again. This should quietly do 

963 # nothing because of internal deduplication, and it shouldn't 

964 # complain about being asked to export the "htm7" elements even 

965 # though there aren't any in these datasets or in the database. 

966 export.saveDatasets(datasets, elements=["htm7"]) 

967 # Save one of the data IDs again; this should be harmless 

968 # because of internal deduplication. 

969 export.saveDataIds([datasets[0].dataId]) 

970 # Save some dimension records directly. 

971 export.saveDimensionData("skymap", [skymapRecord]) 

972 self.assertTrue(os.path.exists(exportFile)) 

973 with tempfile.TemporaryDirectory() as importDir: 

974 # We always want this to be a local posix butler 

975 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

976 # Calling script.butlerImport tests the implementation of the 

977 # butler command line interface "import" subcommand. Functions 

978 # in the script folder are generally considered protected and 

979 # should not be used as public api. 

980 with open(exportFile, "r") as f: 

981 script.butlerImport(importDir, export_file=f, 

982 directory=exportDir, transfer="auto", skip_dimensions=None) 

983 importButler = Butler(importDir, run="ingest/run") 

984 for ref in datasets: 

985 with self.subTest(ref=ref): 

986 # Test for existence by passing in the DatasetType and 

987 # data ID separately, to avoid lookup by dataset_id. 

988 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

989 self.assertEqual(list(importButler.registry.queryDimensionRecords("skymap")), 

990 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)]) 

991 

992 

993class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

994 """PosixDatastore specialization of a butler""" 

995 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

996 fullConfigKey = ".datastore.formatters" 

997 validationCanFail = True 

998 datastoreStr = ["/tmp"] 

999 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"] 

1000 registryStr = "/gen3.sqlite3" 

1001 

1002 def testExportTransferCopy(self): 

1003 """Test local export using all transfer modes""" 

1004 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1005 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1006 # Test that the repo actually has at least one dataset. 

1007 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1008 self.assertGreater(len(datasets), 0) 

1009 uris = [exportButler.getURI(d) for d in datasets] 

1010 datastoreRoot = exportButler.datastore.root 

1011 

1012 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1013 

1014 for path in pathsInStore: 

1015 # Assume local file system 

1016 self.assertTrue(self.checkFileExists(datastoreRoot, path), 

1017 f"Checking path {path}") 

1018 

1019 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1020 with tempfile.TemporaryDirectory(dir=TESTDIR) as exportDir: 

1021 with exportButler.export(directory=exportDir, format="yaml", 

1022 transfer=transfer) as export: 

1023 export.saveDatasets(datasets) 

1024 for path in pathsInStore: 

1025 self.assertTrue(self.checkFileExists(exportDir, path), 

1026 f"Check that mode {transfer} exported files") 

1027 

1028 

1029class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1030 """InMemoryDatastore specialization of a butler""" 

1031 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1032 fullConfigKey = None 

1033 useTempRoot = False 

1034 validationCanFail = False 

1035 datastoreStr = ["datastore='InMemory"] 

1036 datastoreName = ["InMemoryDatastore@"] 

1037 registryStr = ":memory:" 

1038 

1039 def testIngest(self): 

1040 pass 

1041 

1042 

1043class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1044 """PosixDatastore specialization""" 

1045 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1046 fullConfigKey = ".datastore.datastores.1.formatters" 

1047 validationCanFail = True 

1048 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1/,", "/PosixDatastore_2/'"] 

1049 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1", 

1050 "SecondDatastore"] 

1051 registryStr = "/gen3.sqlite3" 

1052 

1053 

1054class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1055 """Test that a yaml file in one location can refer to a root in another.""" 

1056 

1057 datastoreStr = ["dir1"] 

1058 # Disable the makeRepo test since we are deliberately not using 

1059 # butler.yaml as the config name. 

1060 fullConfigKey = None 

1061 

1062 def setUp(self): 

1063 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1064 

1065 # Make a new repository in one place 

1066 self.dir1 = os.path.join(self.root, "dir1") 

1067 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1068 

1069 # Move the yaml file to a different place and add a "root" 

1070 self.dir2 = os.path.join(self.root, "dir2") 

1071 safeMakeDir(self.dir2) 

1072 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1073 config = Config(configFile1) 

1074 config["root"] = self.dir1 

1075 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1076 config.dumpToUri(configFile2) 

1077 os.remove(configFile1) 

1078 self.tmpConfigFile = configFile2 

1079 

1080 def testFileLocations(self): 

1081 self.assertNotEqual(self.dir1, self.dir2) 

1082 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1083 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1084 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1085 

1086 

1087class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1088 """Test that a config file created by makeRepo outside of repo works.""" 

1089 

1090 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1091 

1092 def setUp(self): 

1093 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1094 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1095 

1096 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1097 Butler.makeRepo(self.root, config=Config(self.configFile), 

1098 outfile=self.tmpConfigFile) 

1099 

1100 def tearDown(self): 

1101 if os.path.exists(self.root2): 

1102 shutil.rmtree(self.root2, ignore_errors=True) 

1103 super().tearDown() 

1104 

1105 def testConfigExistence(self): 

1106 c = Config(self.tmpConfigFile) 

1107 uri_config = ButlerURI(c["root"]) 

1108 uri_expected = ButlerURI(self.root, forceDirectory=True) 

1109 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1110 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1111 

1112 def testPutGet(self): 

1113 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1114 self.runPutGetTest(storageClass, "test_metric") 

1115 

1116 

1117class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1118 """Test that a config file created by makeRepo outside of repo works.""" 

1119 

1120 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1121 

1122 def setUp(self): 

1123 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1124 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1125 

1126 self.tmpConfigFile = self.root2 

1127 Butler.makeRepo(self.root, config=Config(self.configFile), 

1128 outfile=self.tmpConfigFile) 

1129 

1130 def testConfigExistence(self): 

1131 # Append the yaml file else Config constructor does not know the file 

1132 # type. 

1133 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1134 super().testConfigExistence() 

1135 

1136 

1137class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1138 """Test that a config file created by makeRepo outside of repo works.""" 

1139 

1140 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1141 

1142 def setUp(self): 

1143 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1144 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1145 

1146 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

1147 Butler.makeRepo(self.root, config=Config(self.configFile), 

1148 outfile=self.tmpConfigFile) 

1149 

1150 

1151@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1152@mock_s3 

1153class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

1154 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1155 a local in-memory SqlRegistry. 

1156 """ 

1157 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1158 fullConfigKey = None 

1159 validationCanFail = True 

1160 

1161 bucketName = "anybucketname" 

1162 """Name of the Bucket that will be used in the tests. The name is read from 

1163 the config file used with the tests during set-up. 

1164 """ 

1165 

1166 root = "butlerRoot/" 

1167 """Root repository directory expected to be used in case useTempRoot=False. 

1168 Otherwise the root is set to a 20 characters long randomly generated string 

1169 during set-up. 

1170 """ 

1171 

1172 datastoreStr = [f"datastore={root}"] 

1173 """Contains all expected root locations in a format expected to be 

1174 returned by Butler stringification. 

1175 """ 

1176 

1177 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"] 

1178 """The expected format of the S3Datastore string.""" 

1179 

1180 registryStr = ":memory:" 

1181 """Expected format of the Registry string.""" 

1182 

1183 def genRoot(self): 

1184 """Returns a random string of len 20 to serve as a root 

1185 name for the temporary bucket repo. 

1186 

1187 This is equivalent to tempfile.mkdtemp as this is what self.root 

1188 becomes when useTempRoot is True. 

1189 """ 

1190 rndstr = "".join( 

1191 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1192 ) 

1193 return rndstr + "/" 

1194 

1195 def setUp(self): 

1196 config = Config(self.configFile) 

1197 uri = ButlerURI(config[".datastore.datastore.root"]) 

1198 self.bucketName = uri.netloc 

1199 

1200 # set up some fake credentials if they do not exist 

1201 self.usingDummyCredentials = setAwsEnvCredentials() 

1202 

1203 if self.useTempRoot: 

1204 self.root = self.genRoot() 

1205 rooturi = f"s3://{self.bucketName}/{self.root}" 

1206 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1207 

1208 # MOTO needs to know that we expect Bucket bucketname to exist 

1209 # (this used to be the class attribute bucketName) 

1210 s3 = boto3.resource("s3") 

1211 s3.create_bucket(Bucket=self.bucketName) 

1212 

1213 self.datastoreStr = f"datastore={self.root}" 

1214 self.datastoreName = [f"S3Datastore@{rooturi}"] 

1215 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1216 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1217 

1218 def tearDown(self): 

1219 s3 = boto3.resource("s3") 

1220 bucket = s3.Bucket(self.bucketName) 

1221 try: 

1222 bucket.objects.all().delete() 

1223 except botocore.exceptions.ClientError as e: 

1224 if e.response["Error"]["Code"] == "404": 

1225 # the key was not reachable - pass 

1226 pass 

1227 else: 

1228 raise 

1229 

1230 bucket = s3.Bucket(self.bucketName) 

1231 bucket.delete() 

1232 

1233 # unset any potentially set dummy credentials 

1234 if self.usingDummyCredentials: 

1235 unsetAwsEnvCredentials() 

1236 

1237 

1238@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1239# Mock required environment variables during tests 

1240@unittest.mock.patch.dict(os.environ, {"WEBDAV_AUTH_METHOD": "TOKEN", 

1241 "WEBDAV_BEARER_TOKEN": "XXXXXX"}) 

1242class WebdavDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

1243 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1244 a local in-memory SqlRegistry. 

1245 """ 

1246 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1247 fullConfigKey = None 

1248 validationCanFail = True 

1249 

1250 serverName = "localhost" 

1251 """Name of the server that will be used in the tests. 

1252 """ 

1253 

1254 portNumber = 8080 

1255 """Port on which the webdav server listens. Automatically chosen 

1256 at setUpClass via the _getfreeport() method 

1257 """ 

1258 

1259 root = "butlerRoot/" 

1260 """Root repository directory expected to be used in case useTempRoot=False. 

1261 Otherwise the root is set to a 20 characters long randomly generated string 

1262 during set-up. 

1263 """ 

1264 

1265 datastoreStr = [f"datastore={root}"] 

1266 """Contains all expected root locations in a format expected to be 

1267 returned by Butler stringification. 

1268 """ 

1269 

1270 datastoreName = ["WebdavDatastore@https://{serverName}/{root}"] 

1271 """The expected format of the WebdavDatastore string.""" 

1272 

1273 registryStr = ":memory:" 

1274 """Expected format of the Registry string.""" 

1275 

1276 serverThread = None 

1277 """Thread in which the local webdav server will run""" 

1278 

1279 stopWebdavServer = False 

1280 """This flag will cause the webdav server to 

1281 gracefully shut down when True 

1282 """ 

1283 

1284 def genRoot(self): 

1285 """Returns a random string of len 20 to serve as a root 

1286 name for the temporary bucket repo. 

1287 

1288 This is equivalent to tempfile.mkdtemp as this is what self.root 

1289 becomes when useTempRoot is True. 

1290 """ 

1291 rndstr = "".join( 

1292 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1293 ) 

1294 return rndstr + "/" 

1295 

1296 @classmethod 

1297 def setUpClass(cls): 

1298 # Do the same as inherited class 

1299 cls.storageClassFactory = StorageClassFactory() 

1300 cls.storageClassFactory.addFromConfig(cls.configFile) 

1301 

1302 cls.portNumber = cls._getfreeport() 

1303 # Run a local webdav server on which tests will be run 

1304 cls.serverThread = Thread(target=cls._serveWebdav, 

1305 args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), 

1306 daemon=True) 

1307 cls.serverThread.start() 

1308 # Wait for it to start 

1309 time.sleep(3) 

1310 

1311 @classmethod 

1312 def tearDownClass(cls): 

1313 # Ask for graceful shut down of the webdav server 

1314 cls.stopWebdavServer = True 

1315 # Wait for the thread to exit 

1316 cls.serverThread.join() 

1317 

1318 # Mock required environment variables during tests 

1319 @unittest.mock.patch.dict(os.environ, {"WEBDAV_AUTH_METHOD": "TOKEN", 

1320 "WEBDAV_BEARER_TOKEN": "XXXXXX"}) 

1321 def setUp(self): 

1322 config = Config(self.configFile) 

1323 

1324 if self.useTempRoot: 

1325 self.root = self.genRoot() 

1326 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1327 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1328 

1329 self.datastoreStr = f"datastore={self.root}" 

1330 self.datastoreName = [f"WebdavDatastore@{self.rooturi}"] 

1331 

1332 if not isWebdavEndpoint(self.rooturi): 

1333 raise OSError("Webdav server not running properly: cannot run tests.") 

1334 

1335 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1336 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1337 

1338 # Mock required environment variables during tests 

1339 @unittest.mock.patch.dict(os.environ, {"WEBDAV_AUTH_METHOD": "TOKEN", 

1340 "WEBDAV_BEARER_TOKEN": "XXXXXX"}) 

1341 def tearDown(self): 

1342 # Clear temporary directory 

1343 ButlerURI(self.rooturi).remove() 

1344 

1345 def _serveWebdav(self, port: int, stopWebdavServer): 

1346 """Starts a local webdav-compatible HTTP server, 

1347 Listening on http://localhost:8080 

1348 This server only runs when this test class is instantiated, 

1349 and then shuts down. Must be started is a separate thread. 

1350 

1351 Parameters 

1352 ---------- 

1353 port : `int` 

1354 The port number on which the server should listen 

1355 """ 

1356 root_path = gettempdir() 

1357 

1358 config = { 

1359 "host": "0.0.0.0", 

1360 "port": port, 

1361 "provider_mapping": {"/": root_path}, 

1362 "http_authenticator": { 

1363 "domain_controller": None 

1364 }, 

1365 "simple_dc": {"user_mapping": {"*": True}}, 

1366 "verbose": 0, 

1367 } 

1368 app = WsgiDAVApp(config) 

1369 

1370 server_args = { 

1371 "bind_addr": (config["host"], config["port"]), 

1372 "wsgi_app": app, 

1373 } 

1374 server = wsgi.Server(**server_args) 

1375 server.prepare() 

1376 

1377 try: 

1378 # Start the actual server in a separate thread 

1379 t = Thread(target=server.serve, daemon=True) 

1380 t.start() 

1381 # watch stopWebdavServer, and gracefully 

1382 # shut down the server when True 

1383 while True: 

1384 if stopWebdavServer(): 

1385 break 

1386 time.sleep(1) 

1387 except KeyboardInterrupt: 

1388 print("Caught Ctrl-C, shutting down...") 

1389 finally: 

1390 server.stop() 

1391 t.join() 

1392 

1393 def _getfreeport(): 

1394 """ 

1395 Determines a free port using sockets. 

1396 """ 

1397 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

1398 free_socket.bind(('0.0.0.0', 0)) 

1399 free_socket.listen() 

1400 port = free_socket.getsockname()[1] 

1401 free_socket.close() 

1402 return port 

1403 

1404 

1405if __name__ == "__main__": 1405 ↛ 1406line 1405 didn't jump to line 1406, because the condition on line 1405 was never true

1406 unittest.main()