Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import os 

26import posixpath 

27import unittest 

28import tempfile 

29import shutil 

30import pickle 

31import string 

32import random 

33import time 

34import socket 

35 

36try: 

37 import boto3 

38 import botocore 

39 from moto import mock_s3 

40except ImportError: 

41 boto3 = None 

42 

43 def mock_s3(cls): 

44 """A no-op decorator in case moto mock_s3 can not be imported. 

45 """ 

46 return cls 

47 

48try: 

49 from cheroot import wsgi 

50 from wsgidav.wsgidav_app import WsgiDAVApp 

51except ImportError: 

52 WsgiDAVApp = None 

53 

54import astropy.time 

55from threading import Thread 

56from tempfile import gettempdir 

57from lsst.utils import doImport 

58from lsst.daf.butler.core.utils import safeMakeDir 

59from lsst.daf.butler import Butler, Config, ButlerConfig 

60from lsst.daf.butler import StorageClassFactory 

61from lsst.daf.butler import DatasetType, DatasetRef 

62from lsst.daf.butler import FileTemplateValidationError, ValidationError 

63from lsst.daf.butler import FileDataset 

64from lsst.daf.butler import CollectionSearch, CollectionType 

65from lsst.daf.butler import ButlerURI 

66from lsst.daf.butler import script 

67from lsst.daf.butler.registry import MissingCollectionError, OrphanedRecordError 

68from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

69from lsst.daf.butler.core._butlerUri.s3utils import (setAwsEnvCredentials, 

70 unsetAwsEnvCredentials) 

71from lsst.daf.butler.core._butlerUri.http import isWebdavEndpoint 

72 

73from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

74from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

75 

76TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

77 

78 

79def makeExampleMetrics(): 

80 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

81 {"a": [1, 2, 3], 

82 "b": {"blue": 5, "red": "green"}}, 

83 [563, 234, 456.7, 752, 8, 9, 27] 

84 ) 

85 

86 

87class TransactionTestError(Exception): 

88 """Specific error for testing transactions, to prevent misdiagnosing 

89 that might otherwise occur when a standard exception is used. 

90 """ 

91 pass 

92 

93 

94class ButlerConfigTests(unittest.TestCase): 

95 """Simple tests for ButlerConfig that are not tested in other test cases. 

96 """ 

97 

98 def testSearchPath(self): 

99 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

100 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

101 config1 = ButlerConfig(configFile) 

102 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

103 

104 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

105 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

106 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

107 self.assertIn("testConfigs", "\n".join(cm.output)) 

108 

109 key = ("datastore", "records", "table") 

110 self.assertNotEqual(config1[key], config2[key]) 

111 self.assertEqual(config2[key], "override_record") 

112 

113 

114class ButlerPutGetTests: 

115 """Helper method for running a suite of put/get tests from different 

116 butler configurations.""" 

117 

118 root = None 

119 

120 @staticmethod 

121 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

122 """Create a DatasetType and register it 

123 """ 

124 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

125 registry.registerDatasetType(datasetType) 

126 return datasetType 

127 

128 @classmethod 

129 def setUpClass(cls): 

130 cls.storageClassFactory = StorageClassFactory() 

131 cls.storageClassFactory.addFromConfig(cls.configFile) 

132 

133 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

134 datasetType = datasetRef.datasetType 

135 dataId = datasetRef.dataId 

136 deferred = butler.getDirectDeferred(datasetRef) 

137 

138 for component in components: 

139 compTypeName = datasetType.componentTypeName(component) 

140 result = butler.get(compTypeName, dataId, collections=collections) 

141 self.assertEqual(result, getattr(reference, component)) 

142 result_deferred = deferred.get(component=component) 

143 self.assertEqual(result_deferred, result) 

144 

145 def tearDown(self): 

146 removeTestTempDir(self.root) 

147 

148 def runPutGetTest(self, storageClass, datasetTypeName): 

149 # New datasets will be added to run and tag, but we will only look in 

150 # tag when looking up datasets. 

151 run = "ingest" 

152 butler = Butler(self.tmpConfigFile, run=run) 

153 

154 collections = set(butler.registry.queryCollections()) 

155 self.assertEqual(collections, set([run])) 

156 

157 # Create and register a DatasetType 

158 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

159 

160 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

161 

162 # Add needed Dimensions 

163 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

164 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

165 "name": "d-r", 

166 "band": "R"}) 

167 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp", 

168 "id": 1, 

169 "name": "default"}) 

170 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

171 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

172 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

173 "name": "fourtwentythree", "physical_filter": "d-r", 

174 "visit_system": 1, "datetime_begin": visit_start, 

175 "datetime_end": visit_end}) 

176 

177 # Add a second visit for some later tests 

178 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424, 

179 "name": "fourtwentyfour", "physical_filter": "d-r", 

180 "visit_system": 1}) 

181 

182 # Create and store a dataset 

183 metric = makeExampleMetrics() 

184 dataId = {"instrument": "DummyCamComp", "visit": 423} 

185 

186 # Create a DatasetRef for put 

187 refIn = DatasetRef(datasetType, dataId, id=None) 

188 

189 # Put with a preexisting id should fail 

190 with self.assertRaises(ValueError): 

191 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

192 

193 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

194 # and once with a DatasetType 

195 

196 # Keep track of any collections we add and do not clean up 

197 expected_collections = {run} 

198 

199 counter = 0 

200 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

201 # Since we are using subTest we can get cascading failures 

202 # here with the first attempt failing and the others failing 

203 # immediately because the dataset already exists. Work around 

204 # this by using a distinct run collection each time 

205 counter += 1 

206 this_run = f"put_run_{counter}" 

207 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

208 expected_collections.update({this_run}) 

209 

210 with self.subTest(args=args): 

211 ref = butler.put(metric, *args, run=this_run) 

212 self.assertIsInstance(ref, DatasetRef) 

213 

214 # Test getDirect 

215 metricOut = butler.getDirect(ref) 

216 self.assertEqual(metric, metricOut) 

217 # Test get 

218 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

219 self.assertEqual(metric, metricOut) 

220 # Test get with a datasetRef 

221 metricOut = butler.get(ref, collections=this_run) 

222 self.assertEqual(metric, metricOut) 

223 # Test getDeferred with dataId 

224 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

225 self.assertEqual(metric, metricOut) 

226 # Test getDeferred with a datasetRef 

227 metricOut = butler.getDeferred(ref, collections=this_run).get() 

228 self.assertEqual(metric, metricOut) 

229 # and deferred direct with ref 

230 metricOut = butler.getDirectDeferred(ref).get() 

231 self.assertEqual(metric, metricOut) 

232 

233 # Check we can get components 

234 if storageClass.isComposite(): 

235 self.assertGetComponents(butler, ref, 

236 ("summary", "data", "output"), metric, 

237 collections=this_run) 

238 

239 # Now remove the dataset completely. 

240 butler.pruneDatasets([ref], purge=True, unstore=True, run=this_run) 

241 # Lookup with original args should still fail. 

242 with self.assertRaises(LookupError): 

243 butler.datasetExists(*args, collections=this_run) 

244 # getDirect() should still fail. 

245 with self.assertRaises(FileNotFoundError): 

246 butler.getDirect(ref) 

247 # Registry shouldn't be able to find it by dataset_id anymore. 

248 self.assertIsNone(butler.registry.getDataset(ref.id)) 

249 

250 # Do explicit registry removal since we know they are 

251 # empty 

252 butler.registry.removeCollection(this_run) 

253 expected_collections.remove(this_run) 

254 

255 # Put the dataset again, since the last thing we did was remove it 

256 # and we want to use the default collection. 

257 ref = butler.put(metric, refIn) 

258 

259 # Get with parameters 

260 stop = 4 

261 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

262 self.assertNotEqual(metric, sliced) 

263 self.assertEqual(metric.summary, sliced.summary) 

264 self.assertEqual(metric.output, sliced.output) 

265 self.assertEqual(metric.data[:stop], sliced.data) 

266 # getDeferred with parameters 

267 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

268 self.assertNotEqual(metric, sliced) 

269 self.assertEqual(metric.summary, sliced.summary) 

270 self.assertEqual(metric.output, sliced.output) 

271 self.assertEqual(metric.data[:stop], sliced.data) 

272 # getDeferred with deferred parameters 

273 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

274 self.assertNotEqual(metric, sliced) 

275 self.assertEqual(metric.summary, sliced.summary) 

276 self.assertEqual(metric.output, sliced.output) 

277 self.assertEqual(metric.data[:stop], sliced.data) 

278 

279 if storageClass.isComposite(): 

280 # Check that components can be retrieved 

281 metricOut = butler.get(ref.datasetType.name, dataId) 

282 compNameS = ref.datasetType.componentTypeName("summary") 

283 compNameD = ref.datasetType.componentTypeName("data") 

284 summary = butler.get(compNameS, dataId) 

285 self.assertEqual(summary, metric.summary) 

286 data = butler.get(compNameD, dataId) 

287 self.assertEqual(data, metric.data) 

288 

289 if "counter" in storageClass.derivedComponents: 

290 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

291 self.assertEqual(count, len(data)) 

292 

293 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId, 

294 parameters={"slice": slice(stop)}) 

295 self.assertEqual(count, stop) 

296 

297 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

298 summary = butler.getDirect(compRef) 

299 self.assertEqual(summary, metric.summary) 

300 

301 # Create a Dataset type that has the same name but is inconsistent. 

302 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions, 

303 self.storageClassFactory.getStorageClass("Config")) 

304 

305 # Getting with a dataset type that does not match registry fails 

306 with self.assertRaises(ValueError): 

307 butler.get(inconsistentDatasetType, dataId) 

308 

309 # Combining a DatasetRef with a dataId should fail 

310 with self.assertRaises(ValueError): 

311 butler.get(ref, dataId) 

312 # Getting with an explicit ref should fail if the id doesn't match 

313 with self.assertRaises(ValueError): 

314 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

315 

316 # Getting a dataset with unknown parameters should fail 

317 with self.assertRaises(KeyError): 

318 butler.get(ref, parameters={"unsupported": True}) 

319 

320 # Check we have a collection 

321 collections = set(butler.registry.queryCollections()) 

322 self.assertEqual(collections, expected_collections) 

323 

324 # Clean up to check that we can remove something that may have 

325 # already had a component removed 

326 butler.pruneDatasets([ref], unstore=True, purge=True) 

327 

328 # Add a dataset back in since some downstream tests require 

329 # something to be present 

330 ref = butler.put(metric, refIn) 

331 

332 return butler 

333 

334 def testDeferredCollectionPassing(self): 

335 # Construct a butler with no run or collection, but make it writeable. 

336 butler = Butler(self.tmpConfigFile, writeable=True) 

337 # Create and register a DatasetType 

338 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

339 datasetType = self.addDatasetType("example", dimensions, 

340 self.storageClassFactory.getStorageClass("StructuredData"), 

341 butler.registry) 

342 # Add needed Dimensions 

343 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

344 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

345 "name": "d-r", 

346 "band": "R"}) 

347 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

348 "name": "fourtwentythree", "physical_filter": "d-r"}) 

349 dataId = {"instrument": "DummyCamComp", "visit": 423} 

350 # Create dataset. 

351 metric = makeExampleMetrics() 

352 # Register a new run and put dataset. 

353 run = "deferred" 

354 butler.registry.registerRun(run) 

355 ref = butler.put(metric, datasetType, dataId, run=run) 

356 # Putting with no run should fail with TypeError. 

357 with self.assertRaises(TypeError): 

358 butler.put(metric, datasetType, dataId) 

359 # Dataset should exist. 

360 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

361 # We should be able to get the dataset back, but with and without 

362 # a deferred dataset handle. 

363 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

364 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

365 # Trying to find the dataset without any collection is a TypeError. 

366 with self.assertRaises(TypeError): 

367 butler.datasetExists(datasetType, dataId) 

368 with self.assertRaises(TypeError): 

369 butler.get(datasetType, dataId) 

370 # Associate the dataset with a different collection. 

371 butler.registry.registerCollection("tagged") 

372 butler.registry.associate("tagged", [ref]) 

373 # Deleting the dataset from the new collection should make it findable 

374 # in the original collection. 

375 butler.pruneDatasets([ref], tags=["tagged"]) 

376 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

377 

378 

379class ButlerTests(ButlerPutGetTests): 

380 """Tests for Butler. 

381 """ 

382 useTempRoot = True 

383 

384 def setUp(self): 

385 """Create a new butler root for each test.""" 

386 self.root = makeTestTempDir(TESTDIR) 

387 Butler.makeRepo(self.root, config=Config(self.configFile)) 

388 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

389 

390 def testConstructor(self): 

391 """Independent test of constructor. 

392 """ 

393 butler = Butler(self.tmpConfigFile, run="ingest") 

394 self.assertIsInstance(butler, Butler) 

395 

396 collections = set(butler.registry.queryCollections()) 

397 self.assertEqual(collections, {"ingest"}) 

398 

399 butler2 = Butler(butler=butler, collections=["other"]) 

400 self.assertEqual( 

401 butler2.collections, 

402 CollectionSearch.fromExpression(["other"]) 

403 ) 

404 self.assertIsNone(butler2.run) 

405 self.assertIs(butler.datastore, butler2.datastore) 

406 

407 def testBasicPutGet(self): 

408 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

409 self.runPutGetTest(storageClass, "test_metric") 

410 

411 def testCompositePutGetConcrete(self): 

412 

413 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

414 butler = self.runPutGetTest(storageClass, "test_metric") 

415 

416 # Should *not* be disassembled 

417 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

418 self.assertEqual(len(datasets), 1) 

419 uri, components = butler.getURIs(datasets[0]) 

420 self.assertIsInstance(uri, ButlerURI) 

421 self.assertFalse(components) 

422 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

423 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

424 

425 # Predicted dataset 

426 dataId = {"instrument": "DummyCamComp", "visit": 424} 

427 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

428 self.assertFalse(components) 

429 self.assertIsInstance(uri, ButlerURI) 

430 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

431 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

432 

433 def testCompositePutGetVirtual(self): 

434 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

435 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

436 

437 # Should be disassembled 

438 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

439 self.assertEqual(len(datasets), 1) 

440 uri, components = butler.getURIs(datasets[0]) 

441 

442 if butler.datastore.isEphemeral: 

443 # Never disassemble in-memory datastore 

444 self.assertIsInstance(uri, ButlerURI) 

445 self.assertFalse(components) 

446 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

447 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

448 else: 

449 self.assertIsNone(uri) 

450 self.assertEqual(set(components), set(storageClass.components)) 

451 for compuri in components.values(): 

452 self.assertIsInstance(compuri, ButlerURI) 

453 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

454 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

455 

456 # Predicted dataset 

457 dataId = {"instrument": "DummyCamComp", "visit": 424} 

458 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

459 

460 if butler.datastore.isEphemeral: 

461 # Never disassembled 

462 self.assertIsInstance(uri, ButlerURI) 

463 self.assertFalse(components) 

464 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

465 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

466 else: 

467 self.assertIsNone(uri) 

468 self.assertEqual(set(components), set(storageClass.components)) 

469 for compuri in components.values(): 

470 self.assertIsInstance(compuri, ButlerURI) 

471 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

472 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

473 

474 def testIngest(self): 

475 butler = Butler(self.tmpConfigFile, run="ingest") 

476 

477 # Create and register a DatasetType 

478 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

479 

480 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

481 datasetTypeName = "metric" 

482 

483 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

484 

485 # Add needed Dimensions 

486 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

487 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

488 "name": "d-r", 

489 "band": "R"}) 

490 for detector in (1, 2): 

491 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

492 "full_name": f"detector{detector}"}) 

493 

494 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

495 "name": "fourtwentythree", "physical_filter": "d-r"}, 

496 {"instrument": "DummyCamComp", "id": 424, 

497 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

498 

499 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

500 dataRoot = os.path.join(TESTDIR, "data", "basic") 

501 datasets = [] 

502 for detector in (1, 2): 

503 detector_name = f"detector_{detector}" 

504 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

505 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

506 # Create a DatasetRef for ingest 

507 refIn = DatasetRef(datasetType, dataId, id=None) 

508 

509 datasets.append(FileDataset(path=metricFile, 

510 refs=[refIn], 

511 formatter=formatter)) 

512 

513 butler.ingest(*datasets, transfer="copy") 

514 

515 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

516 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

517 

518 metrics1 = butler.get(datasetTypeName, dataId1) 

519 metrics2 = butler.get(datasetTypeName, dataId2) 

520 self.assertNotEqual(metrics1, metrics2) 

521 

522 # Compare URIs 

523 uri1 = butler.getURI(datasetTypeName, dataId1) 

524 uri2 = butler.getURI(datasetTypeName, dataId2) 

525 self.assertNotEqual(uri1, uri2) 

526 

527 # Now do a multi-dataset but single file ingest 

528 metricFile = os.path.join(dataRoot, "detectors.yaml") 

529 refs = [] 

530 for detector in (1, 2): 

531 detector_name = f"detector_{detector}" 

532 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

533 # Create a DatasetRef for ingest 

534 refs.append(DatasetRef(datasetType, dataId, id=None)) 

535 

536 datasets = [] 

537 datasets.append(FileDataset(path=metricFile, 

538 refs=refs, 

539 formatter=MultiDetectorFormatter)) 

540 

541 butler.ingest(*datasets, transfer="copy") 

542 

543 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

544 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

545 

546 multi1 = butler.get(datasetTypeName, dataId1) 

547 multi2 = butler.get(datasetTypeName, dataId2) 

548 

549 self.assertEqual(multi1, metrics1) 

550 self.assertEqual(multi2, metrics2) 

551 

552 # Compare URIs 

553 uri1 = butler.getURI(datasetTypeName, dataId1) 

554 uri2 = butler.getURI(datasetTypeName, dataId2) 

555 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

556 

557 # Test that removing one does not break the second 

558 # This line will issue a warning log message for a ChainedDatastore 

559 # that uses an InMemoryDatastore since in-memory can not ingest 

560 # files. 

561 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

562 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

563 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

564 multi2b = butler.get(datasetTypeName, dataId2) 

565 self.assertEqual(multi2, multi2b) 

566 

567 def testPruneCollections(self): 

568 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

569 butler = Butler(self.tmpConfigFile, writeable=True) 

570 # Load registry data with dimensions to hang datasets off of. 

571 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

572 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

573 # Add some RUN-type collections. 

574 run1 = "run1" 

575 butler.registry.registerRun(run1) 

576 run2 = "run2" 

577 butler.registry.registerRun(run2) 

578 # put some datasets. ref1 and ref2 have the same data ID, and are in 

579 # different runs. ref3 has a different data ID. 

580 metric = makeExampleMetrics() 

581 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

582 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

583 butler.registry) 

584 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

585 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

586 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

587 

588 # Add a new dataset type and delete it 

589 tmpName = "prune_collections_disposable" 

590 tmpDatasetType = self.addDatasetType(tmpName, dimensions, storageClass, 

591 butler.registry) 

592 tmpFromRegistry = butler.registry.getDatasetType(tmpName) 

593 self.assertEqual(tmpDatasetType, tmpFromRegistry) 

594 butler.registry.removeDatasetType(tmpName) 

595 with self.assertRaises(KeyError): 

596 butler.registry.getDatasetType(tmpName) 

597 # Removing a second time is fine 

598 butler.registry.removeDatasetType(tmpName) 

599 

600 # Component removal is not allowed 

601 with self.assertRaises(ValueError): 

602 butler.registry.removeDatasetType(DatasetType.nameWithComponent(tmpName, "component")) 

603 

604 # Try and fail to delete a datasetType that is associated with data 

605 with self.assertRaises(OrphanedRecordError): 

606 butler.registry.removeDatasetType(datasetType.name) 

607 

608 # Try to delete a RUN collection without purge, or with purge and not 

609 # unstore. 

610 with self.assertRaises(TypeError): 

611 butler.pruneCollection(run1) 

612 with self.assertRaises(TypeError): 

613 butler.pruneCollection(run2, purge=True) 

614 # Add a TAGGED collection and associate ref3 only into it. 

615 tag1 = "tag1" 

616 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

617 butler.registry.associate(tag1, [ref3]) 

618 # Add a CHAINED collection that searches run1 and then run2. It 

619 # logically contains only ref1, because ref2 is shadowed due to them 

620 # having the same data ID and dataset type. 

621 chain1 = "chain1" 

622 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

623 butler.registry.setCollectionChain(chain1, [run1, run2]) 

624 # Try to delete RUN collections, which should fail with complete 

625 # rollback because they're still referenced by the CHAINED 

626 # collection. 

627 with self.assertRaises(Exception): 

628 butler.pruneCollection(run1, pruge=True, unstore=True) 

629 with self.assertRaises(Exception): 

630 butler.pruneCollection(run2, pruge=True, unstore=True) 

631 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

632 [ref1, ref2, ref3]) 

633 self.assertTrue(butler.datastore.exists(ref1)) 

634 self.assertTrue(butler.datastore.exists(ref2)) 

635 self.assertTrue(butler.datastore.exists(ref3)) 

636 # Try to delete CHAINED and TAGGED collections with purge; should not 

637 # work. 

638 with self.assertRaises(TypeError): 

639 butler.pruneCollection(tag1, purge=True, unstore=True) 

640 with self.assertRaises(TypeError): 

641 butler.pruneCollection(chain1, purge=True, unstore=True) 

642 # Remove the tagged collection with unstore=False. This should not 

643 # affect the datasets. 

644 butler.pruneCollection(tag1) 

645 with self.assertRaises(MissingCollectionError): 

646 butler.registry.getCollectionType(tag1) 

647 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

648 [ref1, ref2, ref3]) 

649 self.assertTrue(butler.datastore.exists(ref1)) 

650 self.assertTrue(butler.datastore.exists(ref2)) 

651 self.assertTrue(butler.datastore.exists(ref3)) 

652 # Add the tagged collection back in, and remove it with unstore=True. 

653 # This should remove ref3 only from the datastore. 

654 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

655 butler.registry.associate(tag1, [ref3]) 

656 butler.pruneCollection(tag1, unstore=True) 

657 with self.assertRaises(MissingCollectionError): 

658 butler.registry.getCollectionType(tag1) 

659 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

660 [ref1, ref2, ref3]) 

661 self.assertTrue(butler.datastore.exists(ref1)) 

662 self.assertTrue(butler.datastore.exists(ref2)) 

663 self.assertFalse(butler.datastore.exists(ref3)) 

664 # Delete the chain with unstore=False. The datasets should not be 

665 # affected at all. 

666 butler.pruneCollection(chain1) 

667 with self.assertRaises(MissingCollectionError): 

668 butler.registry.getCollectionType(chain1) 

669 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

670 [ref1, ref2, ref3]) 

671 self.assertTrue(butler.datastore.exists(ref1)) 

672 self.assertTrue(butler.datastore.exists(ref2)) 

673 self.assertFalse(butler.datastore.exists(ref3)) 

674 # Redefine and then delete the chain with unstore=True. Only ref1 

675 # should be unstored (ref3 has already been unstored, but otherwise 

676 # would be now). 

677 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

678 butler.registry.setCollectionChain(chain1, [run1, run2]) 

679 butler.pruneCollection(chain1, unstore=True) 

680 with self.assertRaises(MissingCollectionError): 

681 butler.registry.getCollectionType(chain1) 

682 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

683 [ref1, ref2, ref3]) 

684 self.assertFalse(butler.datastore.exists(ref1)) 

685 self.assertTrue(butler.datastore.exists(ref2)) 

686 self.assertFalse(butler.datastore.exists(ref3)) 

687 # Remove run1. This removes ref1 and ref3 from the registry (they're 

688 # already gone from the datastore, which is fine). 

689 butler.pruneCollection(run1, purge=True, unstore=True) 

690 with self.assertRaises(MissingCollectionError): 

691 butler.registry.getCollectionType(run1) 

692 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

693 [ref2]) 

694 self.assertTrue(butler.datastore.exists(ref2)) 

695 # Remove run2. This removes ref2 from the registry and the datastore. 

696 butler.pruneCollection(run2, purge=True, unstore=True) 

697 with self.assertRaises(MissingCollectionError): 

698 butler.registry.getCollectionType(run2) 

699 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

700 []) 

701 

702 # Now that the collections have been pruned we can remove the 

703 # dataset type 

704 butler.registry.removeDatasetType(datasetType.name) 

705 

706 def testPickle(self): 

707 """Test pickle support. 

708 """ 

709 butler = Butler(self.tmpConfigFile, run="ingest") 

710 butlerOut = pickle.loads(pickle.dumps(butler)) 

711 self.assertIsInstance(butlerOut, Butler) 

712 self.assertEqual(butlerOut._config, butler._config) 

713 self.assertEqual(butlerOut.collections, butler.collections) 

714 self.assertEqual(butlerOut.run, butler.run) 

715 

716 def testGetDatasetTypes(self): 

717 butler = Butler(self.tmpConfigFile, run="ingest") 

718 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

719 dimensionEntries = [ 

720 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

721 {"instrument": "DummyCamComp"}), 

722 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

723 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

724 ] 

725 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

726 # Add needed Dimensions 

727 for args in dimensionEntries: 

728 butler.registry.insertDimensionData(*args) 

729 

730 # When a DatasetType is added to the registry entries are not created 

731 # for components but querying them can return the components. 

732 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

733 components = set() 

734 for datasetTypeName in datasetTypeNames: 

735 # Create and register a DatasetType 

736 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

737 

738 for componentName in storageClass.components: 

739 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

740 

741 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

742 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

743 

744 # Now that we have some dataset types registered, validate them 

745 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

746 "datasetType.component"]) 

747 

748 # Add a new datasetType that will fail template validation 

749 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

750 if self.validationCanFail: 

751 with self.assertRaises(ValidationError): 

752 butler.validateConfiguration() 

753 

754 # Rerun validation but with a subset of dataset type names 

755 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

756 

757 # Rerun validation but ignore the bad datasetType 

758 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

759 "datasetType.component"]) 

760 

761 def testTransaction(self): 

762 butler = Butler(self.tmpConfigFile, run="ingest") 

763 datasetTypeName = "test_metric" 

764 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

765 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

766 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

767 "band": "R"}), 

768 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

769 "physical_filter": "d-r"})) 

770 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

771 metric = makeExampleMetrics() 

772 dataId = {"instrument": "DummyCam", "visit": 42} 

773 # Create and register a DatasetType 

774 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

775 with self.assertRaises(TransactionTestError): 

776 with butler.transaction(): 

777 # Add needed Dimensions 

778 for args in dimensionEntries: 

779 butler.registry.insertDimensionData(*args) 

780 # Store a dataset 

781 ref = butler.put(metric, datasetTypeName, dataId) 

782 self.assertIsInstance(ref, DatasetRef) 

783 # Test getDirect 

784 metricOut = butler.getDirect(ref) 

785 self.assertEqual(metric, metricOut) 

786 # Test get 

787 metricOut = butler.get(datasetTypeName, dataId) 

788 self.assertEqual(metric, metricOut) 

789 # Check we can get components 

790 self.assertGetComponents(butler, ref, 

791 ("summary", "data", "output"), metric) 

792 raise TransactionTestError("This should roll back the entire transaction") 

793 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"): 

794 butler.registry.expandDataId(dataId) 

795 # Should raise LookupError for missing data ID value 

796 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

797 butler.get(datasetTypeName, dataId) 

798 # Also check explicitly if Dataset entry is missing 

799 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

800 # Direct retrieval should not find the file in the Datastore 

801 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

802 butler.getDirect(ref) 

803 

804 def testMakeRepo(self): 

805 """Test that we can write butler configuration to a new repository via 

806 the Butler.makeRepo interface and then instantiate a butler from the 

807 repo root. 

808 """ 

809 # Do not run the test if we know this datastore configuration does 

810 # not support a file system root 

811 if self.fullConfigKey is None: 

812 return 

813 

814 # create two separate directories 

815 root1 = tempfile.mkdtemp(dir=self.root) 

816 root2 = tempfile.mkdtemp(dir=self.root) 

817 

818 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

819 limited = Config(self.configFile) 

820 butler1 = Butler(butlerConfig) 

821 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

822 full = Config(self.tmpConfigFile) 

823 butler2 = Butler(butlerConfig) 

824 # Butlers should have the same configuration regardless of whether 

825 # defaults were expanded. 

826 self.assertEqual(butler1._config, butler2._config) 

827 # Config files loaded directly should not be the same. 

828 self.assertNotEqual(limited, full) 

829 # Make sure "limited" doesn't have a few keys we know it should be 

830 # inheriting from defaults. 

831 self.assertIn(self.fullConfigKey, full) 

832 self.assertNotIn(self.fullConfigKey, limited) 

833 

834 # Collections don't appear until something is put in them 

835 collections1 = set(butler1.registry.queryCollections()) 

836 self.assertEqual(collections1, set()) 

837 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

838 

839 # Check that a config with no associated file name will not 

840 # work properly with relocatable Butler repo 

841 butlerConfig.configFile = None 

842 with self.assertRaises(ValueError): 

843 Butler(butlerConfig) 

844 

845 with self.assertRaises(FileExistsError): 

846 Butler.makeRepo(self.root, standalone=True, 

847 config=Config(self.configFile), overwrite=False) 

848 

849 def testStringification(self): 

850 butler = Butler(self.tmpConfigFile, run="ingest") 

851 butlerStr = str(butler) 

852 

853 if self.datastoreStr is not None: 

854 for testStr in self.datastoreStr: 

855 self.assertIn(testStr, butlerStr) 

856 if self.registryStr is not None: 

857 self.assertIn(self.registryStr, butlerStr) 

858 

859 datastoreName = butler.datastore.name 

860 if self.datastoreName is not None: 

861 for testStr in self.datastoreName: 

862 self.assertIn(testStr, datastoreName) 

863 

864 

865class FileDatastoreButlerTests(ButlerTests): 

866 """Common tests and specialization of ButlerTests for butlers backed 

867 by datastores that inherit from FileDatastore. 

868 """ 

869 

870 def checkFileExists(self, root, relpath): 

871 """Checks if file exists at a given path (relative to root). 

872 

873 Test testPutTemplates verifies actual physical existance of the files 

874 in the requested location. 

875 """ 

876 uri = ButlerURI(root, forceDirectory=True) 

877 return uri.join(relpath).exists() 

878 

879 def testPutTemplates(self): 

880 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

881 butler = Butler(self.tmpConfigFile, run="ingest") 

882 

883 # Add needed Dimensions 

884 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

885 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

886 "name": "d-r", 

887 "band": "R"}) 

888 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

889 "physical_filter": "d-r"}) 

890 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

891 "physical_filter": "d-r"}) 

892 

893 # Create and store a dataset 

894 metric = makeExampleMetrics() 

895 

896 # Create two almost-identical DatasetTypes (both will use default 

897 # template) 

898 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

899 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

900 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

901 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

902 

903 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

904 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

905 

906 # Put with exactly the data ID keys needed 

907 ref = butler.put(metric, "metric1", dataId1) 

908 uri = butler.getURI(ref) 

909 self.assertTrue(self.checkFileExists(butler.datastore.root, 

910 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"), 

911 f"Checking existence of {uri}") 

912 

913 # Check the template based on dimensions 

914 butler.datastore.templates.validateTemplates([ref]) 

915 

916 # Put with extra data ID keys (physical_filter is an optional 

917 # dependency); should not change template (at least the way we're 

918 # defining them to behave now; the important thing is that they 

919 # must be consistent). 

920 ref = butler.put(metric, "metric2", dataId2) 

921 uri = butler.getURI(ref) 

922 self.assertTrue(self.checkFileExists(butler.datastore.root, 

923 "ingest/metric2/d-r/DummyCamComp_v423.pickle"), 

924 f"Checking existence of {uri}") 

925 

926 # Check the template based on dimensions 

927 butler.datastore.templates.validateTemplates([ref]) 

928 

929 # Now use a file template that will not result in unique filenames 

930 with self.assertRaises(FileTemplateValidationError): 

931 butler.put(metric, "metric3", dataId1) 

932 

933 def testImportExport(self): 

934 # Run put/get tests just to create and populate a repo. 

935 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

936 self.runImportExportTest(storageClass) 

937 

938 @unittest.expectedFailure 

939 def testImportExportVirtualComposite(self): 

940 # Run put/get tests just to create and populate a repo. 

941 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

942 self.runImportExportTest(storageClass) 

943 

944 def runImportExportTest(self, storageClass): 

945 """This test does an export to a temp directory and an import back 

946 into a new temp directory repo. It does not assume a posix datastore""" 

947 exportButler = self.runPutGetTest(storageClass, "test_metric") 

948 print("Root:", exportButler.datastore.root) 

949 # Test that the repo actually has at least one dataset. 

950 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

951 self.assertGreater(len(datasets), 0) 

952 # Add a DimensionRecord that's unused by those datasets. 

953 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

954 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

955 # Export and then import datasets. 

956 with safeTestTempDir(TESTDIR) as exportDir: 

957 exportFile = os.path.join(exportDir, "exports.yaml") 

958 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

959 export.saveDatasets(datasets) 

960 # Export the same datasets again. This should quietly do 

961 # nothing because of internal deduplication, and it shouldn't 

962 # complain about being asked to export the "htm7" elements even 

963 # though there aren't any in these datasets or in the database. 

964 export.saveDatasets(datasets, elements=["htm7"]) 

965 # Save one of the data IDs again; this should be harmless 

966 # because of internal deduplication. 

967 export.saveDataIds([datasets[0].dataId]) 

968 # Save some dimension records directly. 

969 export.saveDimensionData("skymap", [skymapRecord]) 

970 self.assertTrue(os.path.exists(exportFile)) 

971 with safeTestTempDir(TESTDIR) as importDir: 

972 # We always want this to be a local posix butler 

973 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

974 # Calling script.butlerImport tests the implementation of the 

975 # butler command line interface "import" subcommand. Functions 

976 # in the script folder are generally considered protected and 

977 # should not be used as public api. 

978 with open(exportFile, "r") as f: 

979 script.butlerImport(importDir, export_file=f, 

980 directory=exportDir, transfer="auto", skip_dimensions=None) 

981 importButler = Butler(importDir, run="ingest") 

982 for ref in datasets: 

983 with self.subTest(ref=ref): 

984 # Test for existence by passing in the DatasetType and 

985 # data ID separately, to avoid lookup by dataset_id. 

986 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

987 self.assertEqual(list(importButler.registry.queryDimensionRecords("skymap")), 

988 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)]) 

989 

990 

991class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

992 """PosixDatastore specialization of a butler""" 

993 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

994 fullConfigKey = ".datastore.formatters" 

995 validationCanFail = True 

996 datastoreStr = ["/tmp"] 

997 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

998 registryStr = "/gen3.sqlite3" 

999 

1000 def testExportTransferCopy(self): 

1001 """Test local export using all transfer modes""" 

1002 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1003 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1004 # Test that the repo actually has at least one dataset. 

1005 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1006 self.assertGreater(len(datasets), 0) 

1007 uris = [exportButler.getURI(d) for d in datasets] 

1008 datastoreRoot = exportButler.datastore.root 

1009 

1010 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1011 

1012 for path in pathsInStore: 

1013 # Assume local file system 

1014 self.assertTrue(self.checkFileExists(datastoreRoot, path), 

1015 f"Checking path {path}") 

1016 

1017 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1018 with safeTestTempDir(TESTDIR) as exportDir: 

1019 with exportButler.export(directory=exportDir, format="yaml", 

1020 transfer=transfer) as export: 

1021 export.saveDatasets(datasets) 

1022 for path in pathsInStore: 

1023 self.assertTrue(self.checkFileExists(exportDir, path), 

1024 f"Check that mode {transfer} exported files") 

1025 

1026 

1027class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1028 """InMemoryDatastore specialization of a butler""" 

1029 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1030 fullConfigKey = None 

1031 useTempRoot = False 

1032 validationCanFail = False 

1033 datastoreStr = ["datastore='InMemory"] 

1034 datastoreName = ["InMemoryDatastore@"] 

1035 registryStr = "/gen3.sqlite3" 

1036 

1037 def testIngest(self): 

1038 pass 

1039 

1040 

1041class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1042 """PosixDatastore specialization""" 

1043 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1044 fullConfigKey = ".datastore.datastores.1.formatters" 

1045 validationCanFail = True 

1046 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1047 datastoreName = ["InMemoryDatastore@", f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1048 "SecondDatastore"] 

1049 registryStr = "/gen3.sqlite3" 

1050 

1051 

1052class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1053 """Test that a yaml file in one location can refer to a root in another.""" 

1054 

1055 datastoreStr = ["dir1"] 

1056 # Disable the makeRepo test since we are deliberately not using 

1057 # butler.yaml as the config name. 

1058 fullConfigKey = None 

1059 

1060 def setUp(self): 

1061 self.root = makeTestTempDir(TESTDIR) 

1062 

1063 # Make a new repository in one place 

1064 self.dir1 = os.path.join(self.root, "dir1") 

1065 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1066 

1067 # Move the yaml file to a different place and add a "root" 

1068 self.dir2 = os.path.join(self.root, "dir2") 

1069 safeMakeDir(self.dir2) 

1070 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1071 config = Config(configFile1) 

1072 config["root"] = self.dir1 

1073 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1074 config.dumpToUri(configFile2) 

1075 os.remove(configFile1) 

1076 self.tmpConfigFile = configFile2 

1077 

1078 def testFileLocations(self): 

1079 self.assertNotEqual(self.dir1, self.dir2) 

1080 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1081 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1082 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1083 

1084 

1085class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1086 """Test that a config file created by makeRepo outside of repo works.""" 

1087 

1088 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1089 

1090 def setUp(self): 

1091 self.root = makeTestTempDir(TESTDIR) 

1092 self.root2 = makeTestTempDir(TESTDIR) 

1093 

1094 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1095 Butler.makeRepo(self.root, config=Config(self.configFile), 

1096 outfile=self.tmpConfigFile) 

1097 

1098 def tearDown(self): 

1099 if os.path.exists(self.root2): 

1100 shutil.rmtree(self.root2, ignore_errors=True) 

1101 super().tearDown() 

1102 

1103 def testConfigExistence(self): 

1104 c = Config(self.tmpConfigFile) 

1105 uri_config = ButlerURI(c["root"]) 

1106 uri_expected = ButlerURI(self.root, forceDirectory=True) 

1107 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1108 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1109 

1110 def testPutGet(self): 

1111 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1112 self.runPutGetTest(storageClass, "test_metric") 

1113 

1114 

1115class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1116 """Test that a config file created by makeRepo outside of repo works.""" 

1117 

1118 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1119 

1120 def setUp(self): 

1121 self.root = makeTestTempDir(TESTDIR) 

1122 self.root2 = makeTestTempDir(TESTDIR) 

1123 

1124 self.tmpConfigFile = self.root2 

1125 Butler.makeRepo(self.root, config=Config(self.configFile), 

1126 outfile=self.tmpConfigFile) 

1127 

1128 def testConfigExistence(self): 

1129 # Append the yaml file else Config constructor does not know the file 

1130 # type. 

1131 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1132 super().testConfigExistence() 

1133 

1134 

1135class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1136 """Test that a config file created by makeRepo outside of repo works.""" 

1137 

1138 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1139 

1140 def setUp(self): 

1141 self.root = makeTestTempDir(TESTDIR) 

1142 self.root2 = makeTestTempDir(TESTDIR) 

1143 

1144 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

1145 Butler.makeRepo(self.root, config=Config(self.configFile), 

1146 outfile=self.tmpConfigFile) 

1147 

1148 

1149@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1150@mock_s3 

1151class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1152 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1153 a local in-memory SqlRegistry. 

1154 """ 

1155 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1156 fullConfigKey = None 

1157 validationCanFail = True 

1158 

1159 bucketName = "anybucketname" 

1160 """Name of the Bucket that will be used in the tests. The name is read from 

1161 the config file used with the tests during set-up. 

1162 """ 

1163 

1164 root = "butlerRoot/" 

1165 """Root repository directory expected to be used in case useTempRoot=False. 

1166 Otherwise the root is set to a 20 characters long randomly generated string 

1167 during set-up. 

1168 """ 

1169 

1170 datastoreStr = [f"datastore={root}"] 

1171 """Contains all expected root locations in a format expected to be 

1172 returned by Butler stringification. 

1173 """ 

1174 

1175 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1176 """The expected format of the S3 Datastore string.""" 

1177 

1178 registryStr = "/gen3.sqlite3" 

1179 """Expected format of the Registry string.""" 

1180 

1181 def genRoot(self): 

1182 """Returns a random string of len 20 to serve as a root 

1183 name for the temporary bucket repo. 

1184 

1185 This is equivalent to tempfile.mkdtemp as this is what self.root 

1186 becomes when useTempRoot is True. 

1187 """ 

1188 rndstr = "".join( 

1189 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1190 ) 

1191 return rndstr + "/" 

1192 

1193 def setUp(self): 

1194 config = Config(self.configFile) 

1195 uri = ButlerURI(config[".datastore.datastore.root"]) 

1196 self.bucketName = uri.netloc 

1197 

1198 # set up some fake credentials if they do not exist 

1199 self.usingDummyCredentials = setAwsEnvCredentials() 

1200 

1201 if self.useTempRoot: 

1202 self.root = self.genRoot() 

1203 rooturi = f"s3://{self.bucketName}/{self.root}" 

1204 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1205 

1206 # need local folder to store registry database 

1207 self.reg_dir = makeTestTempDir(TESTDIR) 

1208 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1209 

1210 # MOTO needs to know that we expect Bucket bucketname to exist 

1211 # (this used to be the class attribute bucketName) 

1212 s3 = boto3.resource("s3") 

1213 s3.create_bucket(Bucket=self.bucketName) 

1214 

1215 self.datastoreStr = f"datastore={self.root}" 

1216 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1217 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1218 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1219 

1220 def tearDown(self): 

1221 s3 = boto3.resource("s3") 

1222 bucket = s3.Bucket(self.bucketName) 

1223 try: 

1224 bucket.objects.all().delete() 

1225 except botocore.exceptions.ClientError as e: 

1226 if e.response["Error"]["Code"] == "404": 

1227 # the key was not reachable - pass 

1228 pass 

1229 else: 

1230 raise 

1231 

1232 bucket = s3.Bucket(self.bucketName) 

1233 bucket.delete() 

1234 

1235 # unset any potentially set dummy credentials 

1236 if self.usingDummyCredentials: 

1237 unsetAwsEnvCredentials() 

1238 

1239 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1240 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1241 

1242 

1243@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1244# Mock required environment variables during tests 

1245@unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1246 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1247 TESTDIR, "config/testConfigs/webdav/token"), 

1248 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1249class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1250 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1251 a local in-memory SqlRegistry. 

1252 """ 

1253 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1254 fullConfigKey = None 

1255 validationCanFail = True 

1256 

1257 serverName = "localhost" 

1258 """Name of the server that will be used in the tests. 

1259 """ 

1260 

1261 portNumber = 8080 

1262 """Port on which the webdav server listens. Automatically chosen 

1263 at setUpClass via the _getfreeport() method 

1264 """ 

1265 

1266 root = "butlerRoot/" 

1267 """Root repository directory expected to be used in case useTempRoot=False. 

1268 Otherwise the root is set to a 20 characters long randomly generated string 

1269 during set-up. 

1270 """ 

1271 

1272 datastoreStr = [f"datastore={root}"] 

1273 """Contains all expected root locations in a format expected to be 

1274 returned by Butler stringification. 

1275 """ 

1276 

1277 datastoreName = ["FileDatastore@https://{serverName}/{root}"] 

1278 """The expected format of the WebdavDatastore string.""" 

1279 

1280 registryStr = "/gen3.sqlite3" 

1281 """Expected format of the Registry string.""" 

1282 

1283 serverThread = None 

1284 """Thread in which the local webdav server will run""" 

1285 

1286 stopWebdavServer = False 

1287 """This flag will cause the webdav server to 

1288 gracefully shut down when True 

1289 """ 

1290 

1291 def genRoot(self): 

1292 """Returns a random string of len 20 to serve as a root 

1293 name for the temporary bucket repo. 

1294 

1295 This is equivalent to tempfile.mkdtemp as this is what self.root 

1296 becomes when useTempRoot is True. 

1297 """ 

1298 rndstr = "".join( 

1299 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1300 ) 

1301 return rndstr + "/" 

1302 

1303 @classmethod 

1304 def setUpClass(cls): 

1305 # Do the same as inherited class 

1306 cls.storageClassFactory = StorageClassFactory() 

1307 cls.storageClassFactory.addFromConfig(cls.configFile) 

1308 

1309 cls.portNumber = cls._getfreeport() 

1310 # Run a local webdav server on which tests will be run 

1311 cls.serverThread = Thread(target=cls._serveWebdav, 

1312 args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), 

1313 daemon=True) 

1314 cls.serverThread.start() 

1315 # Wait for it to start 

1316 time.sleep(3) 

1317 

1318 @classmethod 

1319 def tearDownClass(cls): 

1320 # Ask for graceful shut down of the webdav server 

1321 cls.stopWebdavServer = True 

1322 # Wait for the thread to exit 

1323 cls.serverThread.join() 

1324 

1325 # Mock required environment variables during tests 

1326 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1327 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1328 TESTDIR, "config/testConfigs/webdav/token"), 

1329 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1330 def setUp(self): 

1331 config = Config(self.configFile) 

1332 

1333 if self.useTempRoot: 

1334 self.root = self.genRoot() 

1335 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1336 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1337 

1338 # need local folder to store registry database 

1339 self.reg_dir = makeTestTempDir(TESTDIR) 

1340 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1341 

1342 self.datastoreStr = f"datastore={self.root}" 

1343 self.datastoreName = [f"FileDatastore@{self.rooturi}"] 

1344 

1345 if not isWebdavEndpoint(self.rooturi): 

1346 raise OSError("Webdav server not running properly: cannot run tests.") 

1347 

1348 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1349 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1350 

1351 # Mock required environment variables during tests 

1352 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1353 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1354 TESTDIR, "config/testConfigs/webdav/token"), 

1355 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1356 def tearDown(self): 

1357 # Clear temporary directory 

1358 ButlerURI(self.rooturi).remove() 

1359 ButlerURI(self.rooturi).session.close() 

1360 

1361 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1362 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1363 

1364 def _serveWebdav(self, port: int, stopWebdavServer): 

1365 """Starts a local webdav-compatible HTTP server, 

1366 Listening on http://localhost:8080 

1367 This server only runs when this test class is instantiated, 

1368 and then shuts down. Must be started is a separate thread. 

1369 

1370 Parameters 

1371 ---------- 

1372 port : `int` 

1373 The port number on which the server should listen 

1374 """ 

1375 root_path = gettempdir() 

1376 

1377 config = { 

1378 "host": "0.0.0.0", 

1379 "port": port, 

1380 "provider_mapping": {"/": root_path}, 

1381 "http_authenticator": { 

1382 "domain_controller": None 

1383 }, 

1384 "simple_dc": {"user_mapping": {"*": True}}, 

1385 "verbose": 0, 

1386 } 

1387 app = WsgiDAVApp(config) 

1388 

1389 server_args = { 

1390 "bind_addr": (config["host"], config["port"]), 

1391 "wsgi_app": app, 

1392 } 

1393 server = wsgi.Server(**server_args) 

1394 server.prepare() 

1395 

1396 try: 

1397 # Start the actual server in a separate thread 

1398 t = Thread(target=server.serve, daemon=True) 

1399 t.start() 

1400 # watch stopWebdavServer, and gracefully 

1401 # shut down the server when True 

1402 while True: 

1403 if stopWebdavServer(): 

1404 break 

1405 time.sleep(1) 

1406 except KeyboardInterrupt: 

1407 print("Caught Ctrl-C, shutting down...") 

1408 finally: 

1409 server.stop() 

1410 t.join() 

1411 

1412 def _getfreeport(): 

1413 """ 

1414 Determines a free port using sockets. 

1415 """ 

1416 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

1417 free_socket.bind(('0.0.0.0', 0)) 

1418 free_socket.listen() 

1419 port = free_socket.getsockname()[1] 

1420 free_socket.close() 

1421 return port 

1422 

1423 

1424if __name__ == "__main__": 1424 ↛ 1425line 1424 didn't jump to line 1425, because the condition on line 1424 was never true

1425 unittest.main()