Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import os 

26import posixpath 

27import unittest 

28import tempfile 

29import shutil 

30import pickle 

31import string 

32import random 

33import time 

34import socket 

35 

36try: 

37 import boto3 

38 import botocore 

39 from moto import mock_s3 

40except ImportError: 

41 boto3 = None 

42 

43 def mock_s3(cls): 

44 """A no-op decorator in case moto mock_s3 can not be imported. 

45 """ 

46 return cls 

47 

48try: 

49 from cheroot import wsgi 

50 from wsgidav.wsgidav_app import WsgiDAVApp 

51except ImportError: 

52 WsgiDAVApp = None 

53 

54import astropy.time 

55from threading import Thread 

56from tempfile import gettempdir 

57from lsst.utils import doImport 

58from lsst.daf.butler.core.utils import safeMakeDir 

59from lsst.daf.butler import Butler, Config, ButlerConfig 

60from lsst.daf.butler import StorageClassFactory 

61from lsst.daf.butler import DatasetType, DatasetRef 

62from lsst.daf.butler import FileTemplateValidationError, ValidationError 

63from lsst.daf.butler import FileDataset 

64from lsst.daf.butler import CollectionSearch, CollectionType 

65from lsst.daf.butler import ButlerURI 

66from lsst.daf.butler import script 

67from lsst.daf.butler.registry import MissingCollectionError 

68from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

69from lsst.daf.butler.core._butlerUri.s3utils import (setAwsEnvCredentials, 

70 unsetAwsEnvCredentials) 

71from lsst.daf.butler.core._butlerUri.http import isWebdavEndpoint 

72 

73from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

74from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

75 

76TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

77 

78 

79def makeExampleMetrics(): 

80 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

81 {"a": [1, 2, 3], 

82 "b": {"blue": 5, "red": "green"}}, 

83 [563, 234, 456.7, 752, 8, 9, 27] 

84 ) 

85 

86 

87class TransactionTestError(Exception): 

88 """Specific error for testing transactions, to prevent misdiagnosing 

89 that might otherwise occur when a standard exception is used. 

90 """ 

91 pass 

92 

93 

94class ButlerConfigTests(unittest.TestCase): 

95 """Simple tests for ButlerConfig that are not tested in other test cases. 

96 """ 

97 

98 def testSearchPath(self): 

99 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

100 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

101 config1 = ButlerConfig(configFile) 

102 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

103 

104 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

105 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

106 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

107 self.assertIn("testConfigs", "\n".join(cm.output)) 

108 

109 key = ("datastore", "records", "table") 

110 self.assertNotEqual(config1[key], config2[key]) 

111 self.assertEqual(config2[key], "override_record") 

112 

113 

114class ButlerPutGetTests: 

115 """Helper method for running a suite of put/get tests from different 

116 butler configurations.""" 

117 

118 root = None 

119 

120 @staticmethod 

121 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

122 """Create a DatasetType and register it 

123 """ 

124 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

125 registry.registerDatasetType(datasetType) 

126 return datasetType 

127 

128 @classmethod 

129 def setUpClass(cls): 

130 cls.storageClassFactory = StorageClassFactory() 

131 cls.storageClassFactory.addFromConfig(cls.configFile) 

132 

133 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

134 datasetType = datasetRef.datasetType 

135 dataId = datasetRef.dataId 

136 deferred = butler.getDirectDeferred(datasetRef) 

137 

138 for component in components: 

139 compTypeName = datasetType.componentTypeName(component) 

140 result = butler.get(compTypeName, dataId, collections=collections) 

141 self.assertEqual(result, getattr(reference, component)) 

142 result_deferred = deferred.get(component=component) 

143 self.assertEqual(result_deferred, result) 

144 

145 def tearDown(self): 

146 removeTestTempDir(self.root) 

147 

148 def runPutGetTest(self, storageClass, datasetTypeName): 

149 # New datasets will be added to run and tag, but we will only look in 

150 # tag when looking up datasets. 

151 run = "ingest" 

152 butler = Butler(self.tmpConfigFile, run=run) 

153 

154 collections = set(butler.registry.queryCollections()) 

155 self.assertEqual(collections, set([run])) 

156 

157 # Create and register a DatasetType 

158 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

159 

160 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

161 

162 # Add needed Dimensions 

163 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

164 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

165 "name": "d-r", 

166 "band": "R"}) 

167 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp", 

168 "id": 1, 

169 "name": "default"}) 

170 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

171 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

172 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

173 "name": "fourtwentythree", "physical_filter": "d-r", 

174 "visit_system": 1, "datetime_begin": visit_start, 

175 "datetime_end": visit_end}) 

176 

177 # Add a second visit for some later tests 

178 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424, 

179 "name": "fourtwentyfour", "physical_filter": "d-r", 

180 "visit_system": 1}) 

181 

182 # Create and store a dataset 

183 metric = makeExampleMetrics() 

184 dataId = {"instrument": "DummyCamComp", "visit": 423} 

185 

186 # Create a DatasetRef for put 

187 refIn = DatasetRef(datasetType, dataId, id=None) 

188 

189 # Put with a preexisting id should fail 

190 with self.assertRaises(ValueError): 

191 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

192 

193 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

194 # and once with a DatasetType 

195 

196 # Keep track of any collections we add and do not clean up 

197 expected_collections = {run} 

198 

199 counter = 0 

200 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

201 # Since we are using subTest we can get cascading failures 

202 # here with the first attempt failing and the others failing 

203 # immediately because the dataset already exists. Work around 

204 # this by using a distinct run collection each time 

205 counter += 1 

206 this_run = f"put_run_{counter}" 

207 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

208 expected_collections.update({this_run}) 

209 

210 with self.subTest(args=args): 

211 ref = butler.put(metric, *args, run=this_run) 

212 self.assertIsInstance(ref, DatasetRef) 

213 

214 # Test getDirect 

215 metricOut = butler.getDirect(ref) 

216 self.assertEqual(metric, metricOut) 

217 # Test get 

218 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

219 self.assertEqual(metric, metricOut) 

220 # Test get with a datasetRef 

221 metricOut = butler.get(ref, collections=this_run) 

222 self.assertEqual(metric, metricOut) 

223 # Test getDeferred with dataId 

224 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

225 self.assertEqual(metric, metricOut) 

226 # Test getDeferred with a datasetRef 

227 metricOut = butler.getDeferred(ref, collections=this_run).get() 

228 self.assertEqual(metric, metricOut) 

229 # and deferred direct with ref 

230 metricOut = butler.getDirectDeferred(ref).get() 

231 self.assertEqual(metric, metricOut) 

232 

233 # Check we can get components 

234 if storageClass.isComposite(): 

235 self.assertGetComponents(butler, ref, 

236 ("summary", "data", "output"), metric, 

237 collections=this_run) 

238 

239 # Now remove the dataset completely. 

240 butler.pruneDatasets([ref], purge=True, unstore=True, run=this_run) 

241 # Lookup with original args should still fail. 

242 with self.assertRaises(LookupError): 

243 butler.datasetExists(*args, collections=this_run) 

244 # getDirect() should still fail. 

245 with self.assertRaises(FileNotFoundError): 

246 butler.getDirect(ref) 

247 # Registry shouldn't be able to find it by dataset_id anymore. 

248 self.assertIsNone(butler.registry.getDataset(ref.id)) 

249 

250 # Do explicit registry removal since we know they are 

251 # empty 

252 butler.registry.removeCollection(this_run) 

253 expected_collections.remove(this_run) 

254 

255 # Put the dataset again, since the last thing we did was remove it 

256 # and we want to use the default collection. 

257 ref = butler.put(metric, refIn) 

258 

259 # Get with parameters 

260 stop = 4 

261 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

262 self.assertNotEqual(metric, sliced) 

263 self.assertEqual(metric.summary, sliced.summary) 

264 self.assertEqual(metric.output, sliced.output) 

265 self.assertEqual(metric.data[:stop], sliced.data) 

266 # getDeferred with parameters 

267 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

268 self.assertNotEqual(metric, sliced) 

269 self.assertEqual(metric.summary, sliced.summary) 

270 self.assertEqual(metric.output, sliced.output) 

271 self.assertEqual(metric.data[:stop], sliced.data) 

272 # getDeferred with deferred parameters 

273 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

274 self.assertNotEqual(metric, sliced) 

275 self.assertEqual(metric.summary, sliced.summary) 

276 self.assertEqual(metric.output, sliced.output) 

277 self.assertEqual(metric.data[:stop], sliced.data) 

278 

279 if storageClass.isComposite(): 

280 # Check that components can be retrieved 

281 metricOut = butler.get(ref.datasetType.name, dataId) 

282 compNameS = ref.datasetType.componentTypeName("summary") 

283 compNameD = ref.datasetType.componentTypeName("data") 

284 summary = butler.get(compNameS, dataId) 

285 self.assertEqual(summary, metric.summary) 

286 data = butler.get(compNameD, dataId) 

287 self.assertEqual(data, metric.data) 

288 

289 if "counter" in storageClass.derivedComponents: 

290 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

291 self.assertEqual(count, len(data)) 

292 

293 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId, 

294 parameters={"slice": slice(stop)}) 

295 self.assertEqual(count, stop) 

296 

297 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

298 summary = butler.getDirect(compRef) 

299 self.assertEqual(summary, metric.summary) 

300 

301 # Create a Dataset type that has the same name but is inconsistent. 

302 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions, 

303 self.storageClassFactory.getStorageClass("Config")) 

304 

305 # Getting with a dataset type that does not match registry fails 

306 with self.assertRaises(ValueError): 

307 butler.get(inconsistentDatasetType, dataId) 

308 

309 # Combining a DatasetRef with a dataId should fail 

310 with self.assertRaises(ValueError): 

311 butler.get(ref, dataId) 

312 # Getting with an explicit ref should fail if the id doesn't match 

313 with self.assertRaises(ValueError): 

314 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

315 

316 # Getting a dataset with unknown parameters should fail 

317 with self.assertRaises(KeyError): 

318 butler.get(ref, parameters={"unsupported": True}) 

319 

320 # Check we have a collection 

321 collections = set(butler.registry.queryCollections()) 

322 self.assertEqual(collections, expected_collections) 

323 

324 # Clean up to check that we can remove something that may have 

325 # already had a component removed 

326 butler.pruneDatasets([ref], unstore=True, purge=True) 

327 

328 # Add a dataset back in since some downstream tests require 

329 # something to be present 

330 ref = butler.put(metric, refIn) 

331 

332 return butler 

333 

334 def testDeferredCollectionPassing(self): 

335 # Construct a butler with no run or collection, but make it writeable. 

336 butler = Butler(self.tmpConfigFile, writeable=True) 

337 # Create and register a DatasetType 

338 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

339 datasetType = self.addDatasetType("example", dimensions, 

340 self.storageClassFactory.getStorageClass("StructuredData"), 

341 butler.registry) 

342 # Add needed Dimensions 

343 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

344 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

345 "name": "d-r", 

346 "band": "R"}) 

347 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

348 "name": "fourtwentythree", "physical_filter": "d-r"}) 

349 dataId = {"instrument": "DummyCamComp", "visit": 423} 

350 # Create dataset. 

351 metric = makeExampleMetrics() 

352 # Register a new run and put dataset. 

353 run = "deferred" 

354 butler.registry.registerRun(run) 

355 ref = butler.put(metric, datasetType, dataId, run=run) 

356 # Putting with no run should fail with TypeError. 

357 with self.assertRaises(TypeError): 

358 butler.put(metric, datasetType, dataId) 

359 # Dataset should exist. 

360 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

361 # We should be able to get the dataset back, but with and without 

362 # a deferred dataset handle. 

363 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

364 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

365 # Trying to find the dataset without any collection is a TypeError. 

366 with self.assertRaises(TypeError): 

367 butler.datasetExists(datasetType, dataId) 

368 with self.assertRaises(TypeError): 

369 butler.get(datasetType, dataId) 

370 # Associate the dataset with a different collection. 

371 butler.registry.registerCollection("tagged") 

372 butler.registry.associate("tagged", [ref]) 

373 # Deleting the dataset from the new collection should make it findable 

374 # in the original collection. 

375 butler.pruneDatasets([ref], tags=["tagged"]) 

376 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

377 

378 

379class ButlerTests(ButlerPutGetTests): 

380 """Tests for Butler. 

381 """ 

382 useTempRoot = True 

383 

384 def setUp(self): 

385 """Create a new butler root for each test.""" 

386 self.root = makeTestTempDir(TESTDIR) 

387 Butler.makeRepo(self.root, config=Config(self.configFile)) 

388 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

389 

390 def testConstructor(self): 

391 """Independent test of constructor. 

392 """ 

393 butler = Butler(self.tmpConfigFile, run="ingest") 

394 self.assertIsInstance(butler, Butler) 

395 

396 collections = set(butler.registry.queryCollections()) 

397 self.assertEqual(collections, {"ingest"}) 

398 

399 butler2 = Butler(butler=butler, collections=["other"]) 

400 self.assertEqual( 

401 butler2.collections, 

402 CollectionSearch.fromExpression(["other"]) 

403 ) 

404 self.assertIsNone(butler2.run) 

405 self.assertIs(butler.datastore, butler2.datastore) 

406 

407 def testBasicPutGet(self): 

408 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

409 self.runPutGetTest(storageClass, "test_metric") 

410 

411 def testCompositePutGetConcrete(self): 

412 

413 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

414 butler = self.runPutGetTest(storageClass, "test_metric") 

415 

416 # Should *not* be disassembled 

417 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

418 self.assertEqual(len(datasets), 1) 

419 uri, components = butler.getURIs(datasets[0]) 

420 self.assertIsInstance(uri, ButlerURI) 

421 self.assertFalse(components) 

422 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

423 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

424 

425 # Predicted dataset 

426 dataId = {"instrument": "DummyCamComp", "visit": 424} 

427 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

428 self.assertFalse(components) 

429 self.assertIsInstance(uri, ButlerURI) 

430 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

431 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

432 

433 def testCompositePutGetVirtual(self): 

434 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

435 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

436 

437 # Should be disassembled 

438 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

439 self.assertEqual(len(datasets), 1) 

440 uri, components = butler.getURIs(datasets[0]) 

441 

442 if butler.datastore.isEphemeral: 

443 # Never disassemble in-memory datastore 

444 self.assertIsInstance(uri, ButlerURI) 

445 self.assertFalse(components) 

446 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

447 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

448 else: 

449 self.assertIsNone(uri) 

450 self.assertEqual(set(components), set(storageClass.components)) 

451 for compuri in components.values(): 

452 self.assertIsInstance(compuri, ButlerURI) 

453 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

454 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

455 

456 # Predicted dataset 

457 dataId = {"instrument": "DummyCamComp", "visit": 424} 

458 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

459 

460 if butler.datastore.isEphemeral: 

461 # Never disassembled 

462 self.assertIsInstance(uri, ButlerURI) 

463 self.assertFalse(components) 

464 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

465 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

466 else: 

467 self.assertIsNone(uri) 

468 self.assertEqual(set(components), set(storageClass.components)) 

469 for compuri in components.values(): 

470 self.assertIsInstance(compuri, ButlerURI) 

471 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

472 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

473 

474 def testIngest(self): 

475 butler = Butler(self.tmpConfigFile, run="ingest") 

476 

477 # Create and register a DatasetType 

478 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

479 

480 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

481 datasetTypeName = "metric" 

482 

483 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

484 

485 # Add needed Dimensions 

486 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

487 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

488 "name": "d-r", 

489 "band": "R"}) 

490 for detector in (1, 2): 

491 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

492 "full_name": f"detector{detector}"}) 

493 

494 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

495 "name": "fourtwentythree", "physical_filter": "d-r"}, 

496 {"instrument": "DummyCamComp", "id": 424, 

497 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

498 

499 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

500 dataRoot = os.path.join(TESTDIR, "data", "basic") 

501 datasets = [] 

502 for detector in (1, 2): 

503 detector_name = f"detector_{detector}" 

504 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

505 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

506 # Create a DatasetRef for ingest 

507 refIn = DatasetRef(datasetType, dataId, id=None) 

508 

509 datasets.append(FileDataset(path=metricFile, 

510 refs=[refIn], 

511 formatter=formatter)) 

512 

513 butler.ingest(*datasets, transfer="copy") 

514 

515 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

516 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

517 

518 metrics1 = butler.get(datasetTypeName, dataId1) 

519 metrics2 = butler.get(datasetTypeName, dataId2) 

520 self.assertNotEqual(metrics1, metrics2) 

521 

522 # Compare URIs 

523 uri1 = butler.getURI(datasetTypeName, dataId1) 

524 uri2 = butler.getURI(datasetTypeName, dataId2) 

525 self.assertNotEqual(uri1, uri2) 

526 

527 # Now do a multi-dataset but single file ingest 

528 metricFile = os.path.join(dataRoot, "detectors.yaml") 

529 refs = [] 

530 for detector in (1, 2): 

531 detector_name = f"detector_{detector}" 

532 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

533 # Create a DatasetRef for ingest 

534 refs.append(DatasetRef(datasetType, dataId, id=None)) 

535 

536 datasets = [] 

537 datasets.append(FileDataset(path=metricFile, 

538 refs=refs, 

539 formatter=MultiDetectorFormatter)) 

540 

541 butler.ingest(*datasets, transfer="copy") 

542 

543 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

544 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

545 

546 multi1 = butler.get(datasetTypeName, dataId1) 

547 multi2 = butler.get(datasetTypeName, dataId2) 

548 

549 self.assertEqual(multi1, metrics1) 

550 self.assertEqual(multi2, metrics2) 

551 

552 # Compare URIs 

553 uri1 = butler.getURI(datasetTypeName, dataId1) 

554 uri2 = butler.getURI(datasetTypeName, dataId2) 

555 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

556 

557 # Test that removing one does not break the second 

558 # This line will issue a warning log message for a ChainedDatastore 

559 # that uses an InMemoryDatastore since in-memory can not ingest 

560 # files. 

561 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

562 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

563 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

564 multi2b = butler.get(datasetTypeName, dataId2) 

565 self.assertEqual(multi2, multi2b) 

566 

567 def testPruneCollections(self): 

568 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

569 butler = Butler(self.tmpConfigFile, writeable=True) 

570 # Load registry data with dimensions to hang datasets off of. 

571 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

572 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

573 # Add some RUN-type collections. 

574 run1 = "run1" 

575 butler.registry.registerRun(run1) 

576 run2 = "run2" 

577 butler.registry.registerRun(run2) 

578 # put some datasets. ref1 and ref2 have the same data ID, and are in 

579 # different runs. ref3 has a different data ID. 

580 metric = makeExampleMetrics() 

581 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

582 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

583 butler.registry) 

584 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

585 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

586 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

587 

588 # Try to delete a RUN collection without purge, or with purge and not 

589 # unstore. 

590 with self.assertRaises(TypeError): 

591 butler.pruneCollection(run1) 

592 with self.assertRaises(TypeError): 

593 butler.pruneCollection(run2, purge=True) 

594 # Add a TAGGED collection and associate ref3 only into it. 

595 tag1 = "tag1" 

596 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

597 butler.registry.associate(tag1, [ref3]) 

598 # Add a CHAINED collection that searches run1 and then run2. It 

599 # logically contains only ref1, because ref2 is shadowed due to them 

600 # having the same data ID and dataset type. 

601 chain1 = "chain1" 

602 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

603 butler.registry.setCollectionChain(chain1, [run1, run2]) 

604 # Try to delete RUN collections, which should fail with complete 

605 # rollback because they're still referenced by the CHAINED 

606 # collection. 

607 with self.assertRaises(Exception): 

608 butler.pruneCollection(run1, pruge=True, unstore=True) 

609 with self.assertRaises(Exception): 

610 butler.pruneCollection(run2, pruge=True, unstore=True) 

611 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

612 [ref1, ref2, ref3]) 

613 self.assertTrue(butler.datastore.exists(ref1)) 

614 self.assertTrue(butler.datastore.exists(ref2)) 

615 self.assertTrue(butler.datastore.exists(ref3)) 

616 # Try to delete CHAINED and TAGGED collections with purge; should not 

617 # work. 

618 with self.assertRaises(TypeError): 

619 butler.pruneCollection(tag1, purge=True, unstore=True) 

620 with self.assertRaises(TypeError): 

621 butler.pruneCollection(chain1, purge=True, unstore=True) 

622 # Remove the tagged collection with unstore=False. This should not 

623 # affect the datasets. 

624 butler.pruneCollection(tag1) 

625 with self.assertRaises(MissingCollectionError): 

626 butler.registry.getCollectionType(tag1) 

627 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

628 [ref1, ref2, ref3]) 

629 self.assertTrue(butler.datastore.exists(ref1)) 

630 self.assertTrue(butler.datastore.exists(ref2)) 

631 self.assertTrue(butler.datastore.exists(ref3)) 

632 # Add the tagged collection back in, and remove it with unstore=True. 

633 # This should remove ref3 only from the datastore. 

634 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

635 butler.registry.associate(tag1, [ref3]) 

636 butler.pruneCollection(tag1, unstore=True) 

637 with self.assertRaises(MissingCollectionError): 

638 butler.registry.getCollectionType(tag1) 

639 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

640 [ref1, ref2, ref3]) 

641 self.assertTrue(butler.datastore.exists(ref1)) 

642 self.assertTrue(butler.datastore.exists(ref2)) 

643 self.assertFalse(butler.datastore.exists(ref3)) 

644 # Delete the chain with unstore=False. The datasets should not be 

645 # affected at all. 

646 butler.pruneCollection(chain1) 

647 with self.assertRaises(MissingCollectionError): 

648 butler.registry.getCollectionType(chain1) 

649 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

650 [ref1, ref2, ref3]) 

651 self.assertTrue(butler.datastore.exists(ref1)) 

652 self.assertTrue(butler.datastore.exists(ref2)) 

653 self.assertFalse(butler.datastore.exists(ref3)) 

654 # Redefine and then delete the chain with unstore=True. Only ref1 

655 # should be unstored (ref3 has already been unstored, but otherwise 

656 # would be now). 

657 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

658 butler.registry.setCollectionChain(chain1, [run1, run2]) 

659 butler.pruneCollection(chain1, unstore=True) 

660 with self.assertRaises(MissingCollectionError): 

661 butler.registry.getCollectionType(chain1) 

662 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

663 [ref1, ref2, ref3]) 

664 self.assertFalse(butler.datastore.exists(ref1)) 

665 self.assertTrue(butler.datastore.exists(ref2)) 

666 self.assertFalse(butler.datastore.exists(ref3)) 

667 # Remove run1. This removes ref1 and ref3 from the registry (they're 

668 # already gone from the datastore, which is fine). 

669 butler.pruneCollection(run1, purge=True, unstore=True) 

670 with self.assertRaises(MissingCollectionError): 

671 butler.registry.getCollectionType(run1) 

672 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

673 [ref2]) 

674 self.assertTrue(butler.datastore.exists(ref2)) 

675 # Remove run2. This removes ref2 from the registry and the datastore. 

676 butler.pruneCollection(run2, purge=True, unstore=True) 

677 with self.assertRaises(MissingCollectionError): 

678 butler.registry.getCollectionType(run2) 

679 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

680 []) 

681 

682 # Now that the collections have been pruned we can remove the 

683 # dataset type 

684 butler.registry.removeDatasetType(datasetType.name) 

685 

686 def testPickle(self): 

687 """Test pickle support. 

688 """ 

689 butler = Butler(self.tmpConfigFile, run="ingest") 

690 butlerOut = pickle.loads(pickle.dumps(butler)) 

691 self.assertIsInstance(butlerOut, Butler) 

692 self.assertEqual(butlerOut._config, butler._config) 

693 self.assertEqual(butlerOut.collections, butler.collections) 

694 self.assertEqual(butlerOut.run, butler.run) 

695 

696 def testGetDatasetTypes(self): 

697 butler = Butler(self.tmpConfigFile, run="ingest") 

698 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

699 dimensionEntries = [ 

700 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

701 {"instrument": "DummyCamComp"}), 

702 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

703 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

704 ] 

705 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

706 # Add needed Dimensions 

707 for args in dimensionEntries: 

708 butler.registry.insertDimensionData(*args) 

709 

710 # When a DatasetType is added to the registry entries are not created 

711 # for components but querying them can return the components. 

712 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

713 components = set() 

714 for datasetTypeName in datasetTypeNames: 

715 # Create and register a DatasetType 

716 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

717 

718 for componentName in storageClass.components: 

719 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

720 

721 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

722 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

723 

724 # Now that we have some dataset types registered, validate them 

725 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

726 "datasetType.component"]) 

727 

728 # Add a new datasetType that will fail template validation 

729 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

730 if self.validationCanFail: 

731 with self.assertRaises(ValidationError): 

732 butler.validateConfiguration() 

733 

734 # Rerun validation but with a subset of dataset type names 

735 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

736 

737 # Rerun validation but ignore the bad datasetType 

738 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

739 "datasetType.component"]) 

740 

741 def testTransaction(self): 

742 butler = Butler(self.tmpConfigFile, run="ingest") 

743 datasetTypeName = "test_metric" 

744 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

745 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

746 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

747 "band": "R"}), 

748 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

749 "physical_filter": "d-r"})) 

750 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

751 metric = makeExampleMetrics() 

752 dataId = {"instrument": "DummyCam", "visit": 42} 

753 # Create and register a DatasetType 

754 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

755 with self.assertRaises(TransactionTestError): 

756 with butler.transaction(): 

757 # Add needed Dimensions 

758 for args in dimensionEntries: 

759 butler.registry.insertDimensionData(*args) 

760 # Store a dataset 

761 ref = butler.put(metric, datasetTypeName, dataId) 

762 self.assertIsInstance(ref, DatasetRef) 

763 # Test getDirect 

764 metricOut = butler.getDirect(ref) 

765 self.assertEqual(metric, metricOut) 

766 # Test get 

767 metricOut = butler.get(datasetTypeName, dataId) 

768 self.assertEqual(metric, metricOut) 

769 # Check we can get components 

770 self.assertGetComponents(butler, ref, 

771 ("summary", "data", "output"), metric) 

772 raise TransactionTestError("This should roll back the entire transaction") 

773 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"): 

774 butler.registry.expandDataId(dataId) 

775 # Should raise LookupError for missing data ID value 

776 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

777 butler.get(datasetTypeName, dataId) 

778 # Also check explicitly if Dataset entry is missing 

779 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

780 # Direct retrieval should not find the file in the Datastore 

781 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

782 butler.getDirect(ref) 

783 

784 def testMakeRepo(self): 

785 """Test that we can write butler configuration to a new repository via 

786 the Butler.makeRepo interface and then instantiate a butler from the 

787 repo root. 

788 """ 

789 # Do not run the test if we know this datastore configuration does 

790 # not support a file system root 

791 if self.fullConfigKey is None: 

792 return 

793 

794 # create two separate directories 

795 root1 = tempfile.mkdtemp(dir=self.root) 

796 root2 = tempfile.mkdtemp(dir=self.root) 

797 

798 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

799 limited = Config(self.configFile) 

800 butler1 = Butler(butlerConfig) 

801 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

802 full = Config(self.tmpConfigFile) 

803 butler2 = Butler(butlerConfig) 

804 # Butlers should have the same configuration regardless of whether 

805 # defaults were expanded. 

806 self.assertEqual(butler1._config, butler2._config) 

807 # Config files loaded directly should not be the same. 

808 self.assertNotEqual(limited, full) 

809 # Make sure "limited" doesn't have a few keys we know it should be 

810 # inheriting from defaults. 

811 self.assertIn(self.fullConfigKey, full) 

812 self.assertNotIn(self.fullConfigKey, limited) 

813 

814 # Collections don't appear until something is put in them 

815 collections1 = set(butler1.registry.queryCollections()) 

816 self.assertEqual(collections1, set()) 

817 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

818 

819 # Check that a config with no associated file name will not 

820 # work properly with relocatable Butler repo 

821 butlerConfig.configFile = None 

822 with self.assertRaises(ValueError): 

823 Butler(butlerConfig) 

824 

825 with self.assertRaises(FileExistsError): 

826 Butler.makeRepo(self.root, standalone=True, 

827 config=Config(self.configFile), overwrite=False) 

828 

829 def testStringification(self): 

830 butler = Butler(self.tmpConfigFile, run="ingest") 

831 butlerStr = str(butler) 

832 

833 if self.datastoreStr is not None: 

834 for testStr in self.datastoreStr: 

835 self.assertIn(testStr, butlerStr) 

836 if self.registryStr is not None: 

837 self.assertIn(self.registryStr, butlerStr) 

838 

839 datastoreName = butler.datastore.name 

840 if self.datastoreName is not None: 

841 for testStr in self.datastoreName: 

842 self.assertIn(testStr, datastoreName) 

843 

844 

845class FileDatastoreButlerTests(ButlerTests): 

846 """Common tests and specialization of ButlerTests for butlers backed 

847 by datastores that inherit from FileDatastore. 

848 """ 

849 

850 def checkFileExists(self, root, relpath): 

851 """Checks if file exists at a given path (relative to root). 

852 

853 Test testPutTemplates verifies actual physical existance of the files 

854 in the requested location. 

855 """ 

856 uri = ButlerURI(root, forceDirectory=True) 

857 return uri.join(relpath).exists() 

858 

859 def testPutTemplates(self): 

860 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

861 butler = Butler(self.tmpConfigFile, run="ingest") 

862 

863 # Add needed Dimensions 

864 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

865 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

866 "name": "d-r", 

867 "band": "R"}) 

868 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

869 "physical_filter": "d-r"}) 

870 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

871 "physical_filter": "d-r"}) 

872 

873 # Create and store a dataset 

874 metric = makeExampleMetrics() 

875 

876 # Create two almost-identical DatasetTypes (both will use default 

877 # template) 

878 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

879 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

880 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

881 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

882 

883 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

884 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

885 

886 # Put with exactly the data ID keys needed 

887 ref = butler.put(metric, "metric1", dataId1) 

888 uri = butler.getURI(ref) 

889 self.assertTrue(self.checkFileExists(butler.datastore.root, 

890 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"), 

891 f"Checking existence of {uri}") 

892 

893 # Check the template based on dimensions 

894 butler.datastore.templates.validateTemplates([ref]) 

895 

896 # Put with extra data ID keys (physical_filter is an optional 

897 # dependency); should not change template (at least the way we're 

898 # defining them to behave now; the important thing is that they 

899 # must be consistent). 

900 ref = butler.put(metric, "metric2", dataId2) 

901 uri = butler.getURI(ref) 

902 self.assertTrue(self.checkFileExists(butler.datastore.root, 

903 "ingest/metric2/d-r/DummyCamComp_v423.pickle"), 

904 f"Checking existence of {uri}") 

905 

906 # Check the template based on dimensions 

907 butler.datastore.templates.validateTemplates([ref]) 

908 

909 # Now use a file template that will not result in unique filenames 

910 with self.assertRaises(FileTemplateValidationError): 

911 butler.put(metric, "metric3", dataId1) 

912 

913 def testImportExport(self): 

914 # Run put/get tests just to create and populate a repo. 

915 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

916 self.runImportExportTest(storageClass) 

917 

918 @unittest.expectedFailure 

919 def testImportExportVirtualComposite(self): 

920 # Run put/get tests just to create and populate a repo. 

921 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

922 self.runImportExportTest(storageClass) 

923 

924 def runImportExportTest(self, storageClass): 

925 """This test does an export to a temp directory and an import back 

926 into a new temp directory repo. It does not assume a posix datastore""" 

927 exportButler = self.runPutGetTest(storageClass, "test_metric") 

928 print("Root:", exportButler.datastore.root) 

929 # Test that the repo actually has at least one dataset. 

930 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

931 self.assertGreater(len(datasets), 0) 

932 # Add a DimensionRecord that's unused by those datasets. 

933 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

934 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

935 # Export and then import datasets. 

936 with safeTestTempDir(TESTDIR) as exportDir: 

937 exportFile = os.path.join(exportDir, "exports.yaml") 

938 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

939 export.saveDatasets(datasets) 

940 # Export the same datasets again. This should quietly do 

941 # nothing because of internal deduplication, and it shouldn't 

942 # complain about being asked to export the "htm7" elements even 

943 # though there aren't any in these datasets or in the database. 

944 export.saveDatasets(datasets, elements=["htm7"]) 

945 # Save one of the data IDs again; this should be harmless 

946 # because of internal deduplication. 

947 export.saveDataIds([datasets[0].dataId]) 

948 # Save some dimension records directly. 

949 export.saveDimensionData("skymap", [skymapRecord]) 

950 self.assertTrue(os.path.exists(exportFile)) 

951 with safeTestTempDir(TESTDIR) as importDir: 

952 # We always want this to be a local posix butler 

953 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

954 # Calling script.butlerImport tests the implementation of the 

955 # butler command line interface "import" subcommand. Functions 

956 # in the script folder are generally considered protected and 

957 # should not be used as public api. 

958 with open(exportFile, "r") as f: 

959 script.butlerImport(importDir, export_file=f, directory=exportDir, 

960 transfer="auto", skip_dimensions=None, reuse_ids=False) 

961 importButler = Butler(importDir, run="ingest") 

962 for ref in datasets: 

963 with self.subTest(ref=ref): 

964 # Test for existence by passing in the DatasetType and 

965 # data ID separately, to avoid lookup by dataset_id. 

966 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

967 self.assertEqual(list(importButler.registry.queryDimensionRecords("skymap")), 

968 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)]) 

969 

970 def testRemoveRuns(self): 

971 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

972 butler = Butler(self.tmpConfigFile, writeable=True) 

973 # Load registry data with dimensions to hang datasets off of. 

974 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

975 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

976 # Add some RUN-type collection. 

977 run1 = "run1" 

978 butler.registry.registerRun(run1) 

979 run2 = "run2" 

980 butler.registry.registerRun(run2) 

981 # put a dataset in each 

982 metric = makeExampleMetrics() 

983 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

984 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

985 butler.registry) 

986 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

987 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

988 uri1 = butler.getURI(ref1, collections=[run1]) 

989 uri2 = butler.getURI(ref2, collections=[run2]) 

990 # Remove from both runs with different values for unstore. 

991 butler.removeRuns([run1], unstore=True) 

992 butler.removeRuns([run2], unstore=False) 

993 # Should be nothing in registry for either one, and datastore should 

994 # not think either exists. 

995 with self.assertRaises(MissingCollectionError): 

996 butler.registry.getCollectionType(run1) 

997 with self.assertRaises(MissingCollectionError): 

998 butler.registry.getCollectionType(run2) 

999 self.assertFalse(butler.datastore.exists(ref1)) 

1000 self.assertFalse(butler.datastore.exists(ref2)) 

1001 # The ref we unstored should be gone according to the URI, but the 

1002 # one we forgot should still be around. 

1003 self.assertFalse(uri1.exists()) 

1004 self.assertTrue(uri2.exists()) 

1005 

1006 

1007class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1008 """PosixDatastore specialization of a butler""" 

1009 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1010 fullConfigKey = ".datastore.formatters" 

1011 validationCanFail = True 

1012 datastoreStr = ["/tmp"] 

1013 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1014 registryStr = "/gen3.sqlite3" 

1015 

1016 def testExportTransferCopy(self): 

1017 """Test local export using all transfer modes""" 

1018 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1019 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1020 # Test that the repo actually has at least one dataset. 

1021 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1022 self.assertGreater(len(datasets), 0) 

1023 uris = [exportButler.getURI(d) for d in datasets] 

1024 datastoreRoot = exportButler.datastore.root 

1025 

1026 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1027 

1028 for path in pathsInStore: 

1029 # Assume local file system 

1030 self.assertTrue(self.checkFileExists(datastoreRoot, path), 

1031 f"Checking path {path}") 

1032 

1033 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1034 with safeTestTempDir(TESTDIR) as exportDir: 

1035 with exportButler.export(directory=exportDir, format="yaml", 

1036 transfer=transfer) as export: 

1037 export.saveDatasets(datasets) 

1038 for path in pathsInStore: 

1039 self.assertTrue(self.checkFileExists(exportDir, path), 

1040 f"Check that mode {transfer} exported files") 

1041 

1042 

1043class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1044 """InMemoryDatastore specialization of a butler""" 

1045 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1046 fullConfigKey = None 

1047 useTempRoot = False 

1048 validationCanFail = False 

1049 datastoreStr = ["datastore='InMemory"] 

1050 datastoreName = ["InMemoryDatastore@"] 

1051 registryStr = "/gen3.sqlite3" 

1052 

1053 def testIngest(self): 

1054 pass 

1055 

1056 

1057class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1058 """PosixDatastore specialization""" 

1059 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1060 fullConfigKey = ".datastore.datastores.1.formatters" 

1061 validationCanFail = True 

1062 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1063 datastoreName = ["InMemoryDatastore@", f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1064 "SecondDatastore"] 

1065 registryStr = "/gen3.sqlite3" 

1066 

1067 

1068class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1069 """Test that a yaml file in one location can refer to a root in another.""" 

1070 

1071 datastoreStr = ["dir1"] 

1072 # Disable the makeRepo test since we are deliberately not using 

1073 # butler.yaml as the config name. 

1074 fullConfigKey = None 

1075 

1076 def setUp(self): 

1077 self.root = makeTestTempDir(TESTDIR) 

1078 

1079 # Make a new repository in one place 

1080 self.dir1 = os.path.join(self.root, "dir1") 

1081 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1082 

1083 # Move the yaml file to a different place and add a "root" 

1084 self.dir2 = os.path.join(self.root, "dir2") 

1085 safeMakeDir(self.dir2) 

1086 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1087 config = Config(configFile1) 

1088 config["root"] = self.dir1 

1089 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1090 config.dumpToUri(configFile2) 

1091 os.remove(configFile1) 

1092 self.tmpConfigFile = configFile2 

1093 

1094 def testFileLocations(self): 

1095 self.assertNotEqual(self.dir1, self.dir2) 

1096 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1097 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1098 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1099 

1100 

1101class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1102 """Test that a config file created by makeRepo outside of repo works.""" 

1103 

1104 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1105 

1106 def setUp(self): 

1107 self.root = makeTestTempDir(TESTDIR) 

1108 self.root2 = makeTestTempDir(TESTDIR) 

1109 

1110 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1111 Butler.makeRepo(self.root, config=Config(self.configFile), 

1112 outfile=self.tmpConfigFile) 

1113 

1114 def tearDown(self): 

1115 if os.path.exists(self.root2): 

1116 shutil.rmtree(self.root2, ignore_errors=True) 

1117 super().tearDown() 

1118 

1119 def testConfigExistence(self): 

1120 c = Config(self.tmpConfigFile) 

1121 uri_config = ButlerURI(c["root"]) 

1122 uri_expected = ButlerURI(self.root, forceDirectory=True) 

1123 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1124 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1125 

1126 def testPutGet(self): 

1127 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1128 self.runPutGetTest(storageClass, "test_metric") 

1129 

1130 

1131class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1132 """Test that a config file created by makeRepo outside of repo works.""" 

1133 

1134 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1135 

1136 def setUp(self): 

1137 self.root = makeTestTempDir(TESTDIR) 

1138 self.root2 = makeTestTempDir(TESTDIR) 

1139 

1140 self.tmpConfigFile = self.root2 

1141 Butler.makeRepo(self.root, config=Config(self.configFile), 

1142 outfile=self.tmpConfigFile) 

1143 

1144 def testConfigExistence(self): 

1145 # Append the yaml file else Config constructor does not know the file 

1146 # type. 

1147 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1148 super().testConfigExistence() 

1149 

1150 

1151class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1152 """Test that a config file created by makeRepo outside of repo works.""" 

1153 

1154 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1155 

1156 def setUp(self): 

1157 self.root = makeTestTempDir(TESTDIR) 

1158 self.root2 = makeTestTempDir(TESTDIR) 

1159 

1160 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

1161 Butler.makeRepo(self.root, config=Config(self.configFile), 

1162 outfile=self.tmpConfigFile) 

1163 

1164 

1165@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1166@mock_s3 

1167class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1168 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1169 a local in-memory SqlRegistry. 

1170 """ 

1171 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1172 fullConfigKey = None 

1173 validationCanFail = True 

1174 

1175 bucketName = "anybucketname" 

1176 """Name of the Bucket that will be used in the tests. The name is read from 

1177 the config file used with the tests during set-up. 

1178 """ 

1179 

1180 root = "butlerRoot/" 

1181 """Root repository directory expected to be used in case useTempRoot=False. 

1182 Otherwise the root is set to a 20 characters long randomly generated string 

1183 during set-up. 

1184 """ 

1185 

1186 datastoreStr = [f"datastore={root}"] 

1187 """Contains all expected root locations in a format expected to be 

1188 returned by Butler stringification. 

1189 """ 

1190 

1191 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1192 """The expected format of the S3 Datastore string.""" 

1193 

1194 registryStr = "/gen3.sqlite3" 

1195 """Expected format of the Registry string.""" 

1196 

1197 def genRoot(self): 

1198 """Returns a random string of len 20 to serve as a root 

1199 name for the temporary bucket repo. 

1200 

1201 This is equivalent to tempfile.mkdtemp as this is what self.root 

1202 becomes when useTempRoot is True. 

1203 """ 

1204 rndstr = "".join( 

1205 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1206 ) 

1207 return rndstr + "/" 

1208 

1209 def setUp(self): 

1210 config = Config(self.configFile) 

1211 uri = ButlerURI(config[".datastore.datastore.root"]) 

1212 self.bucketName = uri.netloc 

1213 

1214 # set up some fake credentials if they do not exist 

1215 self.usingDummyCredentials = setAwsEnvCredentials() 

1216 

1217 if self.useTempRoot: 

1218 self.root = self.genRoot() 

1219 rooturi = f"s3://{self.bucketName}/{self.root}" 

1220 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1221 

1222 # need local folder to store registry database 

1223 self.reg_dir = makeTestTempDir(TESTDIR) 

1224 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1225 

1226 # MOTO needs to know that we expect Bucket bucketname to exist 

1227 # (this used to be the class attribute bucketName) 

1228 s3 = boto3.resource("s3") 

1229 s3.create_bucket(Bucket=self.bucketName) 

1230 

1231 self.datastoreStr = f"datastore={self.root}" 

1232 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1233 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1234 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1235 

1236 def tearDown(self): 

1237 s3 = boto3.resource("s3") 

1238 bucket = s3.Bucket(self.bucketName) 

1239 try: 

1240 bucket.objects.all().delete() 

1241 except botocore.exceptions.ClientError as e: 

1242 if e.response["Error"]["Code"] == "404": 

1243 # the key was not reachable - pass 

1244 pass 

1245 else: 

1246 raise 

1247 

1248 bucket = s3.Bucket(self.bucketName) 

1249 bucket.delete() 

1250 

1251 # unset any potentially set dummy credentials 

1252 if self.usingDummyCredentials: 

1253 unsetAwsEnvCredentials() 

1254 

1255 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1256 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1257 

1258 

1259@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1260# Mock required environment variables during tests 

1261@unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1262 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1263 TESTDIR, "config/testConfigs/webdav/token"), 

1264 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1265class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1266 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1267 a local in-memory SqlRegistry. 

1268 """ 

1269 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1270 fullConfigKey = None 

1271 validationCanFail = True 

1272 

1273 serverName = "localhost" 

1274 """Name of the server that will be used in the tests. 

1275 """ 

1276 

1277 portNumber = 8080 

1278 """Port on which the webdav server listens. Automatically chosen 

1279 at setUpClass via the _getfreeport() method 

1280 """ 

1281 

1282 root = "butlerRoot/" 

1283 """Root repository directory expected to be used in case useTempRoot=False. 

1284 Otherwise the root is set to a 20 characters long randomly generated string 

1285 during set-up. 

1286 """ 

1287 

1288 datastoreStr = [f"datastore={root}"] 

1289 """Contains all expected root locations in a format expected to be 

1290 returned by Butler stringification. 

1291 """ 

1292 

1293 datastoreName = ["FileDatastore@https://{serverName}/{root}"] 

1294 """The expected format of the WebdavDatastore string.""" 

1295 

1296 registryStr = "/gen3.sqlite3" 

1297 """Expected format of the Registry string.""" 

1298 

1299 serverThread = None 

1300 """Thread in which the local webdav server will run""" 

1301 

1302 stopWebdavServer = False 

1303 """This flag will cause the webdav server to 

1304 gracefully shut down when True 

1305 """ 

1306 

1307 def genRoot(self): 

1308 """Returns a random string of len 20 to serve as a root 

1309 name for the temporary bucket repo. 

1310 

1311 This is equivalent to tempfile.mkdtemp as this is what self.root 

1312 becomes when useTempRoot is True. 

1313 """ 

1314 rndstr = "".join( 

1315 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1316 ) 

1317 return rndstr + "/" 

1318 

1319 @classmethod 

1320 def setUpClass(cls): 

1321 # Do the same as inherited class 

1322 cls.storageClassFactory = StorageClassFactory() 

1323 cls.storageClassFactory.addFromConfig(cls.configFile) 

1324 

1325 cls.portNumber = cls._getfreeport() 

1326 # Run a local webdav server on which tests will be run 

1327 cls.serverThread = Thread(target=cls._serveWebdav, 

1328 args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), 

1329 daemon=True) 

1330 cls.serverThread.start() 

1331 # Wait for it to start 

1332 time.sleep(3) 

1333 

1334 @classmethod 

1335 def tearDownClass(cls): 

1336 # Ask for graceful shut down of the webdav server 

1337 cls.stopWebdavServer = True 

1338 # Wait for the thread to exit 

1339 cls.serverThread.join() 

1340 

1341 # Mock required environment variables during tests 

1342 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1343 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1344 TESTDIR, "config/testConfigs/webdav/token"), 

1345 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1346 def setUp(self): 

1347 config = Config(self.configFile) 

1348 

1349 if self.useTempRoot: 

1350 self.root = self.genRoot() 

1351 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1352 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1353 

1354 # need local folder to store registry database 

1355 self.reg_dir = makeTestTempDir(TESTDIR) 

1356 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1357 

1358 self.datastoreStr = f"datastore={self.root}" 

1359 self.datastoreName = [f"FileDatastore@{self.rooturi}"] 

1360 

1361 if not isWebdavEndpoint(self.rooturi): 

1362 raise OSError("Webdav server not running properly: cannot run tests.") 

1363 

1364 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1365 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1366 

1367 # Mock required environment variables during tests 

1368 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1369 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1370 TESTDIR, "config/testConfigs/webdav/token"), 

1371 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1372 def tearDown(self): 

1373 # Clear temporary directory 

1374 ButlerURI(self.rooturi).remove() 

1375 ButlerURI(self.rooturi).session.close() 

1376 

1377 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1378 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1379 

1380 def _serveWebdav(self, port: int, stopWebdavServer): 

1381 """Starts a local webdav-compatible HTTP server, 

1382 Listening on http://localhost:8080 

1383 This server only runs when this test class is instantiated, 

1384 and then shuts down. Must be started is a separate thread. 

1385 

1386 Parameters 

1387 ---------- 

1388 port : `int` 

1389 The port number on which the server should listen 

1390 """ 

1391 root_path = gettempdir() 

1392 

1393 config = { 

1394 "host": "0.0.0.0", 

1395 "port": port, 

1396 "provider_mapping": {"/": root_path}, 

1397 "http_authenticator": { 

1398 "domain_controller": None 

1399 }, 

1400 "simple_dc": {"user_mapping": {"*": True}}, 

1401 "verbose": 0, 

1402 } 

1403 app = WsgiDAVApp(config) 

1404 

1405 server_args = { 

1406 "bind_addr": (config["host"], config["port"]), 

1407 "wsgi_app": app, 

1408 } 

1409 server = wsgi.Server(**server_args) 

1410 server.prepare() 

1411 

1412 try: 

1413 # Start the actual server in a separate thread 

1414 t = Thread(target=server.serve, daemon=True) 

1415 t.start() 

1416 # watch stopWebdavServer, and gracefully 

1417 # shut down the server when True 

1418 while True: 

1419 if stopWebdavServer(): 

1420 break 

1421 time.sleep(1) 

1422 except KeyboardInterrupt: 

1423 print("Caught Ctrl-C, shutting down...") 

1424 finally: 

1425 server.stop() 

1426 t.join() 

1427 

1428 def _getfreeport(): 

1429 """ 

1430 Determines a free port using sockets. 

1431 """ 

1432 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

1433 free_socket.bind(('0.0.0.0', 0)) 

1434 free_socket.listen() 

1435 port = free_socket.getsockname()[1] 

1436 free_socket.close() 

1437 return port 

1438 

1439 

1440if __name__ == "__main__": 1440 ↛ 1441line 1440 didn't jump to line 1441, because the condition on line 1440 was never true

1441 unittest.main()