Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import os 

26import posixpath 

27import unittest 

28import tempfile 

29import shutil 

30import pickle 

31import string 

32import random 

33import time 

34import socket 

35 

36try: 

37 import boto3 

38 import botocore 

39 from moto import mock_s3 

40except ImportError: 

41 boto3 = None 

42 

43 def mock_s3(cls): 

44 """A no-op decorator in case moto mock_s3 can not be imported. 

45 """ 

46 return cls 

47 

48try: 

49 from cheroot import wsgi 

50 from wsgidav.wsgidav_app import WsgiDAVApp 

51except ImportError: 

52 WsgiDAVApp = None 

53 

54import astropy.time 

55from threading import Thread 

56from tempfile import gettempdir 

57from lsst.utils import doImport 

58from lsst.daf.butler.core.utils import safeMakeDir 

59from lsst.daf.butler import Butler, Config, ButlerConfig 

60from lsst.daf.butler import StorageClassFactory 

61from lsst.daf.butler import DatasetType, DatasetRef 

62from lsst.daf.butler import FileTemplateValidationError, ValidationError 

63from lsst.daf.butler import FileDataset 

64from lsst.daf.butler import CollectionSearch, CollectionType 

65from lsst.daf.butler import ButlerURI 

66from lsst.daf.butler import script 

67from lsst.daf.butler.registry import MissingCollectionError, OrphanedRecordError 

68from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

69from lsst.daf.butler.core._butlerUri.s3utils import (setAwsEnvCredentials, 

70 unsetAwsEnvCredentials) 

71from lsst.daf.butler.core._butlerUri.http import isWebdavEndpoint 

72 

73from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

74 

75TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

76 

77 

78def makeExampleMetrics(): 

79 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

80 {"a": [1, 2, 3], 

81 "b": {"blue": 5, "red": "green"}}, 

82 [563, 234, 456.7, 752, 8, 9, 27] 

83 ) 

84 

85 

86class TransactionTestError(Exception): 

87 """Specific error for testing transactions, to prevent misdiagnosing 

88 that might otherwise occur when a standard exception is used. 

89 """ 

90 pass 

91 

92 

93class ButlerConfigTests(unittest.TestCase): 

94 """Simple tests for ButlerConfig that are not tested in other test cases. 

95 """ 

96 

97 def testSearchPath(self): 

98 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

99 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

100 config1 = ButlerConfig(configFile) 

101 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

102 

103 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

104 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

105 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

106 self.assertIn("testConfigs", "\n".join(cm.output)) 

107 

108 key = ("datastore", "records", "table") 

109 self.assertNotEqual(config1[key], config2[key]) 

110 self.assertEqual(config2[key], "override_record") 

111 

112 

113class ButlerPutGetTests: 

114 """Helper method for running a suite of put/get tests from different 

115 butler configurations.""" 

116 

117 root = None 

118 

119 @staticmethod 

120 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

121 """Create a DatasetType and register it 

122 """ 

123 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

124 registry.registerDatasetType(datasetType) 

125 return datasetType 

126 

127 @classmethod 

128 def setUpClass(cls): 

129 cls.storageClassFactory = StorageClassFactory() 

130 cls.storageClassFactory.addFromConfig(cls.configFile) 

131 

132 def assertGetComponents(self, butler, datasetRef, components, reference): 

133 datasetType = datasetRef.datasetType 

134 dataId = datasetRef.dataId 

135 deferred = butler.getDirectDeferred(datasetRef) 

136 

137 for component in components: 

138 compTypeName = datasetType.componentTypeName(component) 

139 result = butler.get(compTypeName, dataId) 

140 self.assertEqual(result, getattr(reference, component)) 

141 result_deferred = deferred.get(component=component) 

142 self.assertEqual(result_deferred, result) 

143 

144 def tearDown(self): 

145 if self.root is not None and os.path.exists(self.root): 

146 shutil.rmtree(self.root, ignore_errors=True) 

147 

148 def runPutGetTest(self, storageClass, datasetTypeName): 

149 # New datasets will be added to run and tag, but we will only look in 

150 # tag when looking up datasets. 

151 run = "ingest/run" 

152 tag = "ingest" 

153 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag]) 

154 

155 # There will not be a collection yet 

156 collections = set(butler.registry.queryCollections()) 

157 self.assertEqual(collections, set([run, tag])) 

158 

159 # Create and register a DatasetType 

160 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

161 

162 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

163 

164 # Add needed Dimensions 

165 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

166 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

167 "name": "d-r", 

168 "band": "R"}) 

169 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp", 

170 "id": 1, 

171 "name": "default"}) 

172 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

173 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

174 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

175 "name": "fourtwentythree", "physical_filter": "d-r", 

176 "visit_system": 1, "datetime_begin": visit_start, 

177 "datetime_end": visit_end}) 

178 

179 # Add a second visit for some later tests 

180 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424, 

181 "name": "fourtwentyfour", "physical_filter": "d-r", 

182 "visit_system": 1}) 

183 

184 # Create and store a dataset 

185 metric = makeExampleMetrics() 

186 dataId = {"instrument": "DummyCamComp", "visit": 423} 

187 

188 # Create a DatasetRef for put 

189 refIn = DatasetRef(datasetType, dataId, id=None) 

190 

191 # Put with a preexisting id should fail 

192 with self.assertRaises(ValueError): 

193 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

194 

195 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

196 # and once with a DatasetType 

197 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

198 with self.subTest(args=args): 

199 ref = butler.put(metric, *args) 

200 self.assertIsInstance(ref, DatasetRef) 

201 

202 # Test getDirect 

203 metricOut = butler.getDirect(ref) 

204 self.assertEqual(metric, metricOut) 

205 # Test get 

206 metricOut = butler.get(ref.datasetType.name, dataId) 

207 self.assertEqual(metric, metricOut) 

208 # Test get with a datasetRef 

209 metricOut = butler.get(ref) 

210 self.assertEqual(metric, metricOut) 

211 # Test getDeferred with dataId 

212 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get() 

213 self.assertEqual(metric, metricOut) 

214 # Test getDeferred with a datasetRef 

215 metricOut = butler.getDeferred(ref).get() 

216 self.assertEqual(metric, metricOut) 

217 # and deferred direct with ref 

218 metricOut = butler.getDirectDeferred(ref).get() 

219 self.assertEqual(metric, metricOut) 

220 

221 # Check we can get components 

222 if storageClass.isComposite(): 

223 self.assertGetComponents(butler, ref, 

224 ("summary", "data", "output"), metric) 

225 

226 # Remove from the tagged collection only; after that we 

227 # shouldn't be able to find it unless we use the dataset_id. 

228 butler.pruneDatasets([ref]) 

229 with self.assertRaises(LookupError): 

230 butler.datasetExists(*args) 

231 # Registry still knows about it, if we use the dataset_id. 

232 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

233 # If we use the output ref with the dataset_id, we should 

234 # still be able to load it with getDirect(). 

235 self.assertEqual(metric, butler.getDirect(ref)) 

236 

237 # Reinsert into collection, then delete from Datastore *and* 

238 # remove from collection. 

239 butler.registry.associate(tag, [ref]) 

240 butler.pruneDatasets([ref], unstore=True) 

241 # Lookup with original args should still fail. 

242 with self.assertRaises(LookupError): 

243 butler.datasetExists(*args) 

244 # Now getDirect() should fail, too. 

245 with self.assertRaises(FileNotFoundError, msg=f"Checking ref {ref} not found"): 

246 butler.getDirect(ref) 

247 # Registry still knows about it, if we use the dataset_id. 

248 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

249 

250 # Now remove the dataset completely. 

251 butler.pruneDatasets([ref], purge=True, unstore=True) 

252 # Lookup with original args should still fail. 

253 with self.assertRaises(LookupError): 

254 butler.datasetExists(*args) 

255 # getDirect() should still fail. 

256 with self.assertRaises(FileNotFoundError): 

257 butler.getDirect(ref) 

258 # Registry shouldn't be able to find it by dataset_id anymore. 

259 self.assertIsNone(butler.registry.getDataset(ref.id)) 

260 

261 # Put the dataset again, since the last thing we did was remove it. 

262 ref = butler.put(metric, refIn) 

263 

264 # Get with parameters 

265 stop = 4 

266 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

267 self.assertNotEqual(metric, sliced) 

268 self.assertEqual(metric.summary, sliced.summary) 

269 self.assertEqual(metric.output, sliced.output) 

270 self.assertEqual(metric.data[:stop], sliced.data) 

271 # getDeferred with parameters 

272 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

273 self.assertNotEqual(metric, sliced) 

274 self.assertEqual(metric.summary, sliced.summary) 

275 self.assertEqual(metric.output, sliced.output) 

276 self.assertEqual(metric.data[:stop], sliced.data) 

277 # getDeferred with deferred parameters 

278 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

279 self.assertNotEqual(metric, sliced) 

280 self.assertEqual(metric.summary, sliced.summary) 

281 self.assertEqual(metric.output, sliced.output) 

282 self.assertEqual(metric.data[:stop], sliced.data) 

283 

284 if storageClass.isComposite(): 

285 # Check that components can be retrieved 

286 metricOut = butler.get(ref.datasetType.name, dataId) 

287 compNameS = ref.datasetType.componentTypeName("summary") 

288 compNameD = ref.datasetType.componentTypeName("data") 

289 summary = butler.get(compNameS, dataId) 

290 self.assertEqual(summary, metric.summary) 

291 data = butler.get(compNameD, dataId) 

292 self.assertEqual(data, metric.data) 

293 

294 if "counter" in storageClass.derivedComponents: 

295 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

296 self.assertEqual(count, len(data)) 

297 

298 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId, 

299 parameters={"slice": slice(stop)}) 

300 self.assertEqual(count, stop) 

301 

302 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

303 summary = butler.getDirect(compRef) 

304 self.assertEqual(summary, metric.summary) 

305 

306 # Create a Dataset type that has the same name but is inconsistent. 

307 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions, 

308 self.storageClassFactory.getStorageClass("Config")) 

309 

310 # Getting with a dataset type that does not match registry fails 

311 with self.assertRaises(ValueError): 

312 butler.get(inconsistentDatasetType, dataId) 

313 

314 # Combining a DatasetRef with a dataId should fail 

315 with self.assertRaises(ValueError): 

316 butler.get(ref, dataId) 

317 # Getting with an explicit ref should fail if the id doesn't match 

318 with self.assertRaises(ValueError): 

319 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

320 

321 # Getting a dataset with unknown parameters should fail 

322 with self.assertRaises(KeyError): 

323 butler.get(ref, parameters={"unsupported": True}) 

324 

325 # Check we have a collection 

326 collections = set(butler.registry.queryCollections()) 

327 self.assertEqual(collections, {run, tag}) 

328 

329 # Clean up to check that we can remove something that may have 

330 # already had a component removed 

331 butler.pruneDatasets([ref], unstore=True, purge=True) 

332 

333 # Add a dataset back in since some downstream tests require 

334 # something to be present 

335 ref = butler.put(metric, refIn) 

336 

337 return butler 

338 

339 def testDeferredCollectionPassing(self): 

340 # Construct a butler with no run or collection, but make it writeable. 

341 butler = Butler(self.tmpConfigFile, writeable=True) 

342 # Create and register a DatasetType 

343 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

344 datasetType = self.addDatasetType("example", dimensions, 

345 self.storageClassFactory.getStorageClass("StructuredData"), 

346 butler.registry) 

347 # Add needed Dimensions 

348 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

349 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

350 "name": "d-r", 

351 "band": "R"}) 

352 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

353 "name": "fourtwentythree", "physical_filter": "d-r"}) 

354 dataId = {"instrument": "DummyCamComp", "visit": 423} 

355 # Create dataset. 

356 metric = makeExampleMetrics() 

357 # Register a new run and put dataset. 

358 run = "deferred" 

359 butler.registry.registerRun(run) 

360 ref = butler.put(metric, datasetType, dataId, run=run) 

361 # Putting with no run should fail with TypeError. 

362 with self.assertRaises(TypeError): 

363 butler.put(metric, datasetType, dataId) 

364 # Dataset should exist. 

365 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

366 # We should be able to get the dataset back, but with and without 

367 # a deferred dataset handle. 

368 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

369 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

370 # Trying to find the dataset without any collection is a TypeError. 

371 with self.assertRaises(TypeError): 

372 butler.datasetExists(datasetType, dataId) 

373 with self.assertRaises(TypeError): 

374 butler.get(datasetType, dataId) 

375 # Associate the dataset with a different collection. 

376 butler.registry.registerCollection("tagged") 

377 butler.registry.associate("tagged", [ref]) 

378 # Deleting the dataset from the new collection should make it findable 

379 # in the original collection. 

380 butler.pruneDatasets([ref], tags=["tagged"]) 

381 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

382 

383 

384class ButlerTests(ButlerPutGetTests): 

385 """Tests for Butler. 

386 """ 

387 useTempRoot = True 

388 

389 def setUp(self): 

390 """Create a new butler root for each test.""" 

391 if self.useTempRoot: 

392 self.root = tempfile.mkdtemp(dir=TESTDIR) 

393 Butler.makeRepo(self.root, config=Config(self.configFile)) 

394 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

395 else: 

396 self.root = None 

397 self.tmpConfigFile = self.configFile 

398 

399 def testConstructor(self): 

400 """Independent test of constructor. 

401 """ 

402 butler = Butler(self.tmpConfigFile, run="ingest") 

403 self.assertIsInstance(butler, Butler) 

404 

405 collections = set(butler.registry.queryCollections()) 

406 self.assertEqual(collections, {"ingest"}) 

407 

408 butler2 = Butler(butler=butler, collections=["other"]) 

409 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"])) 

410 self.assertIsNone(butler2.run) 

411 self.assertIs(butler.registry, butler2.registry) 

412 self.assertIs(butler.datastore, butler2.datastore) 

413 

414 def testBasicPutGet(self): 

415 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

416 self.runPutGetTest(storageClass, "test_metric") 

417 

418 def testCompositePutGetConcrete(self): 

419 

420 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

421 butler = self.runPutGetTest(storageClass, "test_metric") 

422 

423 # Should *not* be disassembled 

424 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

425 self.assertEqual(len(datasets), 1) 

426 uri, components = butler.getURIs(datasets[0]) 

427 self.assertIsInstance(uri, ButlerURI) 

428 self.assertFalse(components) 

429 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

430 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

431 

432 # Predicted dataset 

433 dataId = {"instrument": "DummyCamComp", "visit": 424} 

434 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

435 self.assertFalse(components) 

436 self.assertIsInstance(uri, ButlerURI) 

437 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

438 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

439 

440 def testCompositePutGetVirtual(self): 

441 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

442 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

443 

444 # Should be disassembled 

445 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

446 self.assertEqual(len(datasets), 1) 

447 uri, components = butler.getURIs(datasets[0]) 

448 

449 if butler.datastore.isEphemeral: 

450 # Never disassemble in-memory datastore 

451 self.assertIsInstance(uri, ButlerURI) 

452 self.assertFalse(components) 

453 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

454 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

455 else: 

456 self.assertIsNone(uri) 

457 self.assertEqual(set(components), set(storageClass.components)) 

458 for compuri in components.values(): 

459 self.assertIsInstance(compuri, ButlerURI) 

460 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

461 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

462 

463 # Predicted dataset 

464 dataId = {"instrument": "DummyCamComp", "visit": 424} 

465 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

466 

467 if butler.datastore.isEphemeral: 

468 # Never disassembled 

469 self.assertIsInstance(uri, ButlerURI) 

470 self.assertFalse(components) 

471 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

472 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

473 else: 

474 self.assertIsNone(uri) 

475 self.assertEqual(set(components), set(storageClass.components)) 

476 for compuri in components.values(): 

477 self.assertIsInstance(compuri, ButlerURI) 

478 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

479 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

480 

481 def testIngest(self): 

482 butler = Butler(self.tmpConfigFile, run="ingest") 

483 

484 # Create and register a DatasetType 

485 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

486 

487 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

488 datasetTypeName = "metric" 

489 

490 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

491 

492 # Add needed Dimensions 

493 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

494 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

495 "name": "d-r", 

496 "band": "R"}) 

497 for detector in (1, 2): 

498 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

499 "full_name": f"detector{detector}"}) 

500 

501 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

502 "name": "fourtwentythree", "physical_filter": "d-r"}, 

503 {"instrument": "DummyCamComp", "id": 424, 

504 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

505 

506 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

507 dataRoot = os.path.join(TESTDIR, "data", "basic") 

508 datasets = [] 

509 for detector in (1, 2): 

510 detector_name = f"detector_{detector}" 

511 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

512 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

513 # Create a DatasetRef for ingest 

514 refIn = DatasetRef(datasetType, dataId, id=None) 

515 

516 datasets.append(FileDataset(path=metricFile, 

517 refs=[refIn], 

518 formatter=formatter)) 

519 

520 butler.ingest(*datasets, transfer="copy") 

521 

522 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

523 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

524 

525 metrics1 = butler.get(datasetTypeName, dataId1) 

526 metrics2 = butler.get(datasetTypeName, dataId2) 

527 self.assertNotEqual(metrics1, metrics2) 

528 

529 # Compare URIs 

530 uri1 = butler.getURI(datasetTypeName, dataId1) 

531 uri2 = butler.getURI(datasetTypeName, dataId2) 

532 self.assertNotEqual(uri1, uri2) 

533 

534 # Now do a multi-dataset but single file ingest 

535 metricFile = os.path.join(dataRoot, "detectors.yaml") 

536 refs = [] 

537 for detector in (1, 2): 

538 detector_name = f"detector_{detector}" 

539 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

540 # Create a DatasetRef for ingest 

541 refs.append(DatasetRef(datasetType, dataId, id=None)) 

542 

543 datasets = [] 

544 datasets.append(FileDataset(path=metricFile, 

545 refs=refs, 

546 formatter=MultiDetectorFormatter)) 

547 

548 butler.ingest(*datasets, transfer="copy") 

549 

550 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

551 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

552 

553 multi1 = butler.get(datasetTypeName, dataId1) 

554 multi2 = butler.get(datasetTypeName, dataId2) 

555 

556 self.assertEqual(multi1, metrics1) 

557 self.assertEqual(multi2, metrics2) 

558 

559 # Compare URIs 

560 uri1 = butler.getURI(datasetTypeName, dataId1) 

561 uri2 = butler.getURI(datasetTypeName, dataId2) 

562 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

563 

564 # Test that removing one does not break the second 

565 # This line will issue a warning log message for a ChainedDatastore 

566 # that uses an InMemoryDatastore since in-memory can not ingest 

567 # files. 

568 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

569 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

570 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

571 multi2b = butler.get(datasetTypeName, dataId2) 

572 self.assertEqual(multi2, multi2b) 

573 

574 def testPruneCollections(self): 

575 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

576 butler = Butler(self.tmpConfigFile, writeable=True) 

577 # Load registry data with dimensions to hang datasets off of. 

578 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

579 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

580 # Add some RUN-type collections. 

581 run1 = "run1" 

582 butler.registry.registerRun(run1) 

583 run2 = "run2" 

584 butler.registry.registerRun(run2) 

585 # put some datasets. ref1 and ref2 have the same data ID, and are in 

586 # different runs. ref3 has a different data ID. 

587 metric = makeExampleMetrics() 

588 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

589 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

590 butler.registry) 

591 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

592 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

593 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

594 

595 # Add a new dataset type and delete it 

596 tmpName = "prune_collections_disposable" 

597 tmpDatasetType = self.addDatasetType(tmpName, dimensions, storageClass, 

598 butler.registry) 

599 tmpFromRegistry = butler.registry.getDatasetType(tmpName) 

600 self.assertEqual(tmpDatasetType, tmpFromRegistry) 

601 butler.registry.removeDatasetType(tmpName) 

602 with self.assertRaises(KeyError): 

603 butler.registry.getDatasetType(tmpName) 

604 # Removing a second time is fine 

605 butler.registry.removeDatasetType(tmpName) 

606 

607 # Component removal is not allowed 

608 with self.assertRaises(ValueError): 

609 butler.registry.removeDatasetType(DatasetType.nameWithComponent(tmpName, "component")) 

610 

611 # Try and fail to delete a datasetType that is associated with data 

612 with self.assertRaises(OrphanedRecordError): 

613 butler.registry.removeDatasetType(datasetType.name) 

614 

615 # Try to delete a RUN collection without purge, or with purge and not 

616 # unstore. 

617 with self.assertRaises(TypeError): 

618 butler.pruneCollection(run1) 

619 with self.assertRaises(TypeError): 

620 butler.pruneCollection(run2, purge=True) 

621 # Add a TAGGED collection and associate ref3 only into it. 

622 tag1 = "tag1" 

623 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

624 butler.registry.associate(tag1, [ref3]) 

625 # Add a CHAINED collection that searches run1 and then run2. It 

626 # logically contains only ref1, because ref2 is shadowed due to them 

627 # having the same data ID and dataset type. 

628 chain1 = "chain1" 

629 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

630 butler.registry.setCollectionChain(chain1, [run1, run2]) 

631 # Try to delete RUN collections, which should fail with complete 

632 # rollback because they're still referenced by the CHAINED 

633 # collection. 

634 with self.assertRaises(Exception): 

635 butler.pruneCollection(run1, pruge=True, unstore=True) 

636 with self.assertRaises(Exception): 

637 butler.pruneCollection(run2, pruge=True, unstore=True) 

638 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

639 [ref1, ref2, ref3]) 

640 self.assertTrue(butler.datastore.exists(ref1)) 

641 self.assertTrue(butler.datastore.exists(ref2)) 

642 self.assertTrue(butler.datastore.exists(ref3)) 

643 # Try to delete CHAINED and TAGGED collections with purge; should not 

644 # work. 

645 with self.assertRaises(TypeError): 

646 butler.pruneCollection(tag1, purge=True, unstore=True) 

647 with self.assertRaises(TypeError): 

648 butler.pruneCollection(chain1, purge=True, unstore=True) 

649 # Remove the tagged collection with unstore=False. This should not 

650 # affect the datasets. 

651 butler.pruneCollection(tag1) 

652 with self.assertRaises(MissingCollectionError): 

653 butler.registry.getCollectionType(tag1) 

654 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

655 [ref1, ref2, ref3]) 

656 self.assertTrue(butler.datastore.exists(ref1)) 

657 self.assertTrue(butler.datastore.exists(ref2)) 

658 self.assertTrue(butler.datastore.exists(ref3)) 

659 # Add the tagged collection back in, and remove it with unstore=True. 

660 # This should remove ref3 only from the datastore. 

661 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

662 butler.registry.associate(tag1, [ref3]) 

663 butler.pruneCollection(tag1, unstore=True) 

664 with self.assertRaises(MissingCollectionError): 

665 butler.registry.getCollectionType(tag1) 

666 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

667 [ref1, ref2, ref3]) 

668 self.assertTrue(butler.datastore.exists(ref1)) 

669 self.assertTrue(butler.datastore.exists(ref2)) 

670 self.assertFalse(butler.datastore.exists(ref3)) 

671 # Delete the chain with unstore=False. The datasets should not be 

672 # affected at all. 

673 butler.pruneCollection(chain1) 

674 with self.assertRaises(MissingCollectionError): 

675 butler.registry.getCollectionType(chain1) 

676 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

677 [ref1, ref2, ref3]) 

678 self.assertTrue(butler.datastore.exists(ref1)) 

679 self.assertTrue(butler.datastore.exists(ref2)) 

680 self.assertFalse(butler.datastore.exists(ref3)) 

681 # Redefine and then delete the chain with unstore=True. Only ref1 

682 # should be unstored (ref3 has already been unstored, but otherwise 

683 # would be now). 

684 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

685 butler.registry.setCollectionChain(chain1, [run1, run2]) 

686 butler.pruneCollection(chain1, unstore=True) 

687 with self.assertRaises(MissingCollectionError): 

688 butler.registry.getCollectionType(chain1) 

689 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

690 [ref1, ref2, ref3]) 

691 self.assertFalse(butler.datastore.exists(ref1)) 

692 self.assertTrue(butler.datastore.exists(ref2)) 

693 self.assertFalse(butler.datastore.exists(ref3)) 

694 # Remove run1. This removes ref1 and ref3 from the registry (they're 

695 # already gone from the datastore, which is fine). 

696 butler.pruneCollection(run1, purge=True, unstore=True) 

697 with self.assertRaises(MissingCollectionError): 

698 butler.registry.getCollectionType(run1) 

699 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

700 [ref2]) 

701 self.assertTrue(butler.datastore.exists(ref2)) 

702 # Remove run2. This removes ref2 from the registry and the datastore. 

703 butler.pruneCollection(run2, purge=True, unstore=True) 

704 with self.assertRaises(MissingCollectionError): 

705 butler.registry.getCollectionType(run2) 

706 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

707 []) 

708 

709 # Now that the collections have been pruned we can remove the 

710 # dataset type 

711 butler.registry.removeDatasetType(datasetType.name) 

712 

713 def testPickle(self): 

714 """Test pickle support. 

715 """ 

716 butler = Butler(self.tmpConfigFile, run="ingest") 

717 butlerOut = pickle.loads(pickle.dumps(butler)) 

718 self.assertIsInstance(butlerOut, Butler) 

719 self.assertEqual(butlerOut._config, butler._config) 

720 self.assertEqual(butlerOut.collections, butler.collections) 

721 self.assertEqual(butlerOut.run, butler.run) 

722 

723 def testGetDatasetTypes(self): 

724 butler = Butler(self.tmpConfigFile, run="ingest") 

725 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

726 dimensionEntries = [ 

727 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

728 {"instrument": "DummyCamComp"}), 

729 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

730 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

731 ] 

732 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

733 # Add needed Dimensions 

734 for args in dimensionEntries: 

735 butler.registry.insertDimensionData(*args) 

736 

737 # When a DatasetType is added to the registry entries are not created 

738 # for components but querying them can return the components. 

739 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

740 components = set() 

741 for datasetTypeName in datasetTypeNames: 

742 # Create and register a DatasetType 

743 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

744 

745 for componentName in storageClass.components: 

746 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

747 

748 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

749 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

750 

751 # Now that we have some dataset types registered, validate them 

752 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

753 "datasetType.component"]) 

754 

755 # Add a new datasetType that will fail template validation 

756 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

757 if self.validationCanFail: 

758 with self.assertRaises(ValidationError): 

759 butler.validateConfiguration() 

760 

761 # Rerun validation but with a subset of dataset type names 

762 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

763 

764 # Rerun validation but ignore the bad datasetType 

765 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

766 "datasetType.component"]) 

767 

768 def testTransaction(self): 

769 butler = Butler(self.tmpConfigFile, run="ingest") 

770 datasetTypeName = "test_metric" 

771 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

772 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

773 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

774 "band": "R"}), 

775 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

776 "physical_filter": "d-r"})) 

777 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

778 metric = makeExampleMetrics() 

779 dataId = {"instrument": "DummyCam", "visit": 42} 

780 # Create and register a DatasetType 

781 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

782 with self.assertRaises(TransactionTestError): 

783 with butler.transaction(): 

784 # Add needed Dimensions 

785 for args in dimensionEntries: 

786 butler.registry.insertDimensionData(*args) 

787 # Store a dataset 

788 ref = butler.put(metric, datasetTypeName, dataId) 

789 self.assertIsInstance(ref, DatasetRef) 

790 # Test getDirect 

791 metricOut = butler.getDirect(ref) 

792 self.assertEqual(metric, metricOut) 

793 # Test get 

794 metricOut = butler.get(datasetTypeName, dataId) 

795 self.assertEqual(metric, metricOut) 

796 # Check we can get components 

797 self.assertGetComponents(butler, ref, 

798 ("summary", "data", "output"), metric) 

799 raise TransactionTestError("This should roll back the entire transaction") 

800 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"): 

801 butler.registry.expandDataId(dataId) 

802 # Should raise LookupError for missing data ID value 

803 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

804 butler.get(datasetTypeName, dataId) 

805 # Also check explicitly if Dataset entry is missing 

806 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

807 # Direct retrieval should not find the file in the Datastore 

808 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

809 butler.getDirect(ref) 

810 

811 def testMakeRepo(self): 

812 """Test that we can write butler configuration to a new repository via 

813 the Butler.makeRepo interface and then instantiate a butler from the 

814 repo root. 

815 """ 

816 # Do not run the test if we know this datastore configuration does 

817 # not support a file system root 

818 if self.fullConfigKey is None: 

819 return 

820 

821 # Remove the file created in setUp 

822 os.unlink(self.tmpConfigFile) 

823 

824 createRegistry = not self.useTempRoot 

825 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile), 

826 createRegistry=createRegistry) 

827 limited = Config(self.configFile) 

828 butler1 = Butler(butlerConfig) 

829 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

830 config=Config(self.configFile), overwrite=True) 

831 full = Config(self.tmpConfigFile) 

832 butler2 = Butler(butlerConfig) 

833 # Butlers should have the same configuration regardless of whether 

834 # defaults were expanded. 

835 self.assertEqual(butler1._config, butler2._config) 

836 # Config files loaded directly should not be the same. 

837 self.assertNotEqual(limited, full) 

838 # Make sure "limited" doesn't have a few keys we know it should be 

839 # inheriting from defaults. 

840 self.assertIn(self.fullConfigKey, full) 

841 self.assertNotIn(self.fullConfigKey, limited) 

842 

843 # Collections don't appear until something is put in them 

844 collections1 = set(butler1.registry.queryCollections()) 

845 self.assertEqual(collections1, set()) 

846 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

847 

848 # Check that a config with no associated file name will not 

849 # work properly with relocatable Butler repo 

850 butlerConfig.configFile = None 

851 with self.assertRaises(ValueError): 

852 Butler(butlerConfig) 

853 

854 with self.assertRaises(FileExistsError): 

855 Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

856 config=Config(self.configFile), overwrite=False) 

857 

858 def testStringification(self): 

859 butler = Butler(self.tmpConfigFile, run="ingest") 

860 butlerStr = str(butler) 

861 

862 if self.datastoreStr is not None: 

863 for testStr in self.datastoreStr: 

864 self.assertIn(testStr, butlerStr) 

865 if self.registryStr is not None: 

866 self.assertIn(self.registryStr, butlerStr) 

867 

868 datastoreName = butler.datastore.name 

869 if self.datastoreName is not None: 

870 for testStr in self.datastoreName: 

871 self.assertIn(testStr, datastoreName) 

872 

873 

874class FileLikeDatastoreButlerTests(ButlerTests): 

875 """Common tests and specialization of ButlerTests for butlers backed 

876 by datastores that inherit from FileLikeDatastore. 

877 """ 

878 

879 def checkFileExists(self, root, relpath): 

880 """Checks if file exists at a given path (relative to root). 

881 

882 Test testPutTemplates verifies actual physical existance of the files 

883 in the requested location. 

884 """ 

885 uri = ButlerURI(root, forceDirectory=True) 

886 return uri.join(relpath).exists() 

887 

888 def testPutTemplates(self): 

889 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

890 butler = Butler(self.tmpConfigFile, run="ingest") 

891 

892 # Add needed Dimensions 

893 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

894 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

895 "name": "d-r", 

896 "band": "R"}) 

897 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

898 "physical_filter": "d-r"}) 

899 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

900 "physical_filter": "d-r"}) 

901 

902 # Create and store a dataset 

903 metric = makeExampleMetrics() 

904 

905 # Create two almost-identical DatasetTypes (both will use default 

906 # template) 

907 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

908 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

909 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

910 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

911 

912 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

913 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

914 

915 # Put with exactly the data ID keys needed 

916 ref = butler.put(metric, "metric1", dataId1) 

917 uri = butler.getURI(ref) 

918 self.assertTrue(self.checkFileExists(butler.datastore.root, 

919 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"), 

920 f"Checking existence of {uri}") 

921 

922 # Check the template based on dimensions 

923 butler.datastore.templates.validateTemplates([ref]) 

924 

925 # Put with extra data ID keys (physical_filter is an optional 

926 # dependency); should not change template (at least the way we're 

927 # defining them to behave now; the important thing is that they 

928 # must be consistent). 

929 ref = butler.put(metric, "metric2", dataId2) 

930 uri = butler.getURI(ref) 

931 self.assertTrue(self.checkFileExists(butler.datastore.root, 

932 "ingest/metric2/d-r/DummyCamComp_v423.pickle"), 

933 f"Checking existence of {uri}") 

934 

935 # Check the template based on dimensions 

936 butler.datastore.templates.validateTemplates([ref]) 

937 

938 # Now use a file template that will not result in unique filenames 

939 with self.assertRaises(FileTemplateValidationError): 

940 butler.put(metric, "metric3", dataId1) 

941 

942 def testImportExport(self): 

943 # Run put/get tests just to create and populate a repo. 

944 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

945 self.runImportExportTest(storageClass) 

946 

947 @unittest.expectedFailure 

948 def testImportExportVirtualComposite(self): 

949 # Run put/get tests just to create and populate a repo. 

950 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

951 self.runImportExportTest(storageClass) 

952 

953 def runImportExportTest(self, storageClass): 

954 """This test does an export to a temp directory and an import back 

955 into a new temp directory repo. It does not assume a posix datastore""" 

956 exportButler = self.runPutGetTest(storageClass, "test_metric") 

957 print("Root:", exportButler.datastore.root) 

958 # Test that the repo actually has at least one dataset. 

959 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

960 self.assertGreater(len(datasets), 0) 

961 # Add a DimensionRecord that's unused by those datasets. 

962 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

963 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

964 # Export and then import datasets. 

965 with tempfile.TemporaryDirectory() as exportDir: 

966 exportFile = os.path.join(exportDir, "exports.yaml") 

967 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

968 export.saveDatasets(datasets) 

969 # Export the same datasets again. This should quietly do 

970 # nothing because of internal deduplication, and it shouldn't 

971 # complain about being asked to export the "htm7" elements even 

972 # though there aren't any in these datasets or in the database. 

973 export.saveDatasets(datasets, elements=["htm7"]) 

974 # Save one of the data IDs again; this should be harmless 

975 # because of internal deduplication. 

976 export.saveDataIds([datasets[0].dataId]) 

977 # Save some dimension records directly. 

978 export.saveDimensionData("skymap", [skymapRecord]) 

979 self.assertTrue(os.path.exists(exportFile)) 

980 with tempfile.TemporaryDirectory() as importDir: 

981 # We always want this to be a local posix butler 

982 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

983 # Calling script.butlerImport tests the implementation of the 

984 # butler command line interface "import" subcommand. Functions 

985 # in the script folder are generally considered protected and 

986 # should not be used as public api. 

987 with open(exportFile, "r") as f: 

988 script.butlerImport(importDir, export_file=f, 

989 directory=exportDir, transfer="auto", skip_dimensions=None) 

990 importButler = Butler(importDir, run="ingest/run") 

991 for ref in datasets: 

992 with self.subTest(ref=ref): 

993 # Test for existence by passing in the DatasetType and 

994 # data ID separately, to avoid lookup by dataset_id. 

995 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

996 self.assertEqual(list(importButler.registry.queryDimensionRecords("skymap")), 

997 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)]) 

998 

999 

1000class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

1001 """PosixDatastore specialization of a butler""" 

1002 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1003 fullConfigKey = ".datastore.formatters" 

1004 validationCanFail = True 

1005 datastoreStr = ["/tmp"] 

1006 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"] 

1007 registryStr = "/gen3.sqlite3" 

1008 

1009 def testExportTransferCopy(self): 

1010 """Test local export using all transfer modes""" 

1011 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1012 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1013 # Test that the repo actually has at least one dataset. 

1014 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1015 self.assertGreater(len(datasets), 0) 

1016 uris = [exportButler.getURI(d) for d in datasets] 

1017 datastoreRoot = exportButler.datastore.root 

1018 

1019 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1020 

1021 for path in pathsInStore: 

1022 # Assume local file system 

1023 self.assertTrue(self.checkFileExists(datastoreRoot, path), 

1024 f"Checking path {path}") 

1025 

1026 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1027 with tempfile.TemporaryDirectory(dir=TESTDIR) as exportDir: 

1028 with exportButler.export(directory=exportDir, format="yaml", 

1029 transfer=transfer) as export: 

1030 export.saveDatasets(datasets) 

1031 for path in pathsInStore: 

1032 self.assertTrue(self.checkFileExists(exportDir, path), 

1033 f"Check that mode {transfer} exported files") 

1034 

1035 

1036class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1037 """InMemoryDatastore specialization of a butler""" 

1038 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1039 fullConfigKey = None 

1040 useTempRoot = False 

1041 validationCanFail = False 

1042 datastoreStr = ["datastore='InMemory"] 

1043 datastoreName = ["InMemoryDatastore@"] 

1044 registryStr = ":memory:" 

1045 

1046 def testIngest(self): 

1047 pass 

1048 

1049 

1050class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1051 """PosixDatastore specialization""" 

1052 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1053 fullConfigKey = ".datastore.datastores.1.formatters" 

1054 validationCanFail = True 

1055 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1/,", "/PosixDatastore_2/'"] 

1056 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1", 

1057 "SecondDatastore"] 

1058 registryStr = "/gen3.sqlite3" 

1059 

1060 

1061class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1062 """Test that a yaml file in one location can refer to a root in another.""" 

1063 

1064 datastoreStr = ["dir1"] 

1065 # Disable the makeRepo test since we are deliberately not using 

1066 # butler.yaml as the config name. 

1067 fullConfigKey = None 

1068 

1069 def setUp(self): 

1070 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1071 

1072 # Make a new repository in one place 

1073 self.dir1 = os.path.join(self.root, "dir1") 

1074 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1075 

1076 # Move the yaml file to a different place and add a "root" 

1077 self.dir2 = os.path.join(self.root, "dir2") 

1078 safeMakeDir(self.dir2) 

1079 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1080 config = Config(configFile1) 

1081 config["root"] = self.dir1 

1082 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1083 config.dumpToUri(configFile2) 

1084 os.remove(configFile1) 

1085 self.tmpConfigFile = configFile2 

1086 

1087 def testFileLocations(self): 

1088 self.assertNotEqual(self.dir1, self.dir2) 

1089 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1090 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1091 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1092 

1093 

1094class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1095 """Test that a config file created by makeRepo outside of repo works.""" 

1096 

1097 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1098 

1099 def setUp(self): 

1100 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1101 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1102 

1103 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1104 Butler.makeRepo(self.root, config=Config(self.configFile), 

1105 outfile=self.tmpConfigFile) 

1106 

1107 def tearDown(self): 

1108 if os.path.exists(self.root2): 

1109 shutil.rmtree(self.root2, ignore_errors=True) 

1110 super().tearDown() 

1111 

1112 def testConfigExistence(self): 

1113 c = Config(self.tmpConfigFile) 

1114 uri_config = ButlerURI(c["root"]) 

1115 uri_expected = ButlerURI(self.root, forceDirectory=True) 

1116 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1117 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1118 

1119 def testPutGet(self): 

1120 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1121 self.runPutGetTest(storageClass, "test_metric") 

1122 

1123 

1124class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1125 """Test that a config file created by makeRepo outside of repo works.""" 

1126 

1127 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1128 

1129 def setUp(self): 

1130 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1131 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1132 

1133 self.tmpConfigFile = self.root2 

1134 Butler.makeRepo(self.root, config=Config(self.configFile), 

1135 outfile=self.tmpConfigFile) 

1136 

1137 def testConfigExistence(self): 

1138 # Append the yaml file else Config constructor does not know the file 

1139 # type. 

1140 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1141 super().testConfigExistence() 

1142 

1143 

1144class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1145 """Test that a config file created by makeRepo outside of repo works.""" 

1146 

1147 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1148 

1149 def setUp(self): 

1150 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1151 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

1152 

1153 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

1154 Butler.makeRepo(self.root, config=Config(self.configFile), 

1155 outfile=self.tmpConfigFile) 

1156 

1157 

1158@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1159@mock_s3 

1160class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

1161 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1162 a local in-memory SqlRegistry. 

1163 """ 

1164 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1165 fullConfigKey = None 

1166 validationCanFail = True 

1167 

1168 bucketName = "anybucketname" 

1169 """Name of the Bucket that will be used in the tests. The name is read from 

1170 the config file used with the tests during set-up. 

1171 """ 

1172 

1173 root = "butlerRoot/" 

1174 """Root repository directory expected to be used in case useTempRoot=False. 

1175 Otherwise the root is set to a 20 characters long randomly generated string 

1176 during set-up. 

1177 """ 

1178 

1179 datastoreStr = [f"datastore={root}"] 

1180 """Contains all expected root locations in a format expected to be 

1181 returned by Butler stringification. 

1182 """ 

1183 

1184 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"] 

1185 """The expected format of the S3Datastore string.""" 

1186 

1187 registryStr = ":memory:" 

1188 """Expected format of the Registry string.""" 

1189 

1190 def genRoot(self): 

1191 """Returns a random string of len 20 to serve as a root 

1192 name for the temporary bucket repo. 

1193 

1194 This is equivalent to tempfile.mkdtemp as this is what self.root 

1195 becomes when useTempRoot is True. 

1196 """ 

1197 rndstr = "".join( 

1198 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1199 ) 

1200 return rndstr + "/" 

1201 

1202 def setUp(self): 

1203 config = Config(self.configFile) 

1204 uri = ButlerURI(config[".datastore.datastore.root"]) 

1205 self.bucketName = uri.netloc 

1206 

1207 # set up some fake credentials if they do not exist 

1208 self.usingDummyCredentials = setAwsEnvCredentials() 

1209 

1210 if self.useTempRoot: 

1211 self.root = self.genRoot() 

1212 rooturi = f"s3://{self.bucketName}/{self.root}" 

1213 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1214 

1215 # MOTO needs to know that we expect Bucket bucketname to exist 

1216 # (this used to be the class attribute bucketName) 

1217 s3 = boto3.resource("s3") 

1218 s3.create_bucket(Bucket=self.bucketName) 

1219 

1220 self.datastoreStr = f"datastore={self.root}" 

1221 self.datastoreName = [f"S3Datastore@{rooturi}"] 

1222 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1223 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1224 

1225 def tearDown(self): 

1226 s3 = boto3.resource("s3") 

1227 bucket = s3.Bucket(self.bucketName) 

1228 try: 

1229 bucket.objects.all().delete() 

1230 except botocore.exceptions.ClientError as e: 

1231 if e.response["Error"]["Code"] == "404": 

1232 # the key was not reachable - pass 

1233 pass 

1234 else: 

1235 raise 

1236 

1237 bucket = s3.Bucket(self.bucketName) 

1238 bucket.delete() 

1239 

1240 # unset any potentially set dummy credentials 

1241 if self.usingDummyCredentials: 

1242 unsetAwsEnvCredentials() 

1243 

1244 

1245@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1246# Mock required environment variables during tests 

1247@unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1248 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1249 TESTDIR, "config/testConfigs/webdav/token"), 

1250 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1251class WebdavDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

1252 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1253 a local in-memory SqlRegistry. 

1254 """ 

1255 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1256 fullConfigKey = None 

1257 validationCanFail = True 

1258 

1259 serverName = "localhost" 

1260 """Name of the server that will be used in the tests. 

1261 """ 

1262 

1263 portNumber = 8080 

1264 """Port on which the webdav server listens. Automatically chosen 

1265 at setUpClass via the _getfreeport() method 

1266 """ 

1267 

1268 root = "butlerRoot/" 

1269 """Root repository directory expected to be used in case useTempRoot=False. 

1270 Otherwise the root is set to a 20 characters long randomly generated string 

1271 during set-up. 

1272 """ 

1273 

1274 datastoreStr = [f"datastore={root}"] 

1275 """Contains all expected root locations in a format expected to be 

1276 returned by Butler stringification. 

1277 """ 

1278 

1279 datastoreName = ["WebdavDatastore@https://{serverName}/{root}"] 

1280 """The expected format of the WebdavDatastore string.""" 

1281 

1282 registryStr = ":memory:" 

1283 """Expected format of the Registry string.""" 

1284 

1285 serverThread = None 

1286 """Thread in which the local webdav server will run""" 

1287 

1288 stopWebdavServer = False 

1289 """This flag will cause the webdav server to 

1290 gracefully shut down when True 

1291 """ 

1292 

1293 def genRoot(self): 

1294 """Returns a random string of len 20 to serve as a root 

1295 name for the temporary bucket repo. 

1296 

1297 This is equivalent to tempfile.mkdtemp as this is what self.root 

1298 becomes when useTempRoot is True. 

1299 """ 

1300 rndstr = "".join( 

1301 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1302 ) 

1303 return rndstr + "/" 

1304 

1305 @classmethod 

1306 def setUpClass(cls): 

1307 # Do the same as inherited class 

1308 cls.storageClassFactory = StorageClassFactory() 

1309 cls.storageClassFactory.addFromConfig(cls.configFile) 

1310 

1311 cls.portNumber = cls._getfreeport() 

1312 # Run a local webdav server on which tests will be run 

1313 cls.serverThread = Thread(target=cls._serveWebdav, 

1314 args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), 

1315 daemon=True) 

1316 cls.serverThread.start() 

1317 # Wait for it to start 

1318 time.sleep(3) 

1319 

1320 @classmethod 

1321 def tearDownClass(cls): 

1322 # Ask for graceful shut down of the webdav server 

1323 cls.stopWebdavServer = True 

1324 # Wait for the thread to exit 

1325 cls.serverThread.join() 

1326 

1327 # Mock required environment variables during tests 

1328 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1329 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1330 TESTDIR, "config/testConfigs/webdav/token"), 

1331 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1332 def setUp(self): 

1333 config = Config(self.configFile) 

1334 

1335 if self.useTempRoot: 

1336 self.root = self.genRoot() 

1337 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1338 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1339 

1340 self.datastoreStr = f"datastore={self.root}" 

1341 self.datastoreName = [f"WebdavDatastore@{self.rooturi}"] 

1342 

1343 if not isWebdavEndpoint(self.rooturi): 

1344 raise OSError("Webdav server not running properly: cannot run tests.") 

1345 

1346 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1347 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1348 

1349 # Mock required environment variables during tests 

1350 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1351 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1352 TESTDIR, "config/testConfigs/webdav/token"), 

1353 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1354 def tearDown(self): 

1355 # Clear temporary directory 

1356 ButlerURI(self.rooturi).remove() 

1357 ButlerURI(self.rooturi).session.close() 

1358 

1359 def _serveWebdav(self, port: int, stopWebdavServer): 

1360 """Starts a local webdav-compatible HTTP server, 

1361 Listening on http://localhost:8080 

1362 This server only runs when this test class is instantiated, 

1363 and then shuts down. Must be started is a separate thread. 

1364 

1365 Parameters 

1366 ---------- 

1367 port : `int` 

1368 The port number on which the server should listen 

1369 """ 

1370 root_path = gettempdir() 

1371 

1372 config = { 

1373 "host": "0.0.0.0", 

1374 "port": port, 

1375 "provider_mapping": {"/": root_path}, 

1376 "http_authenticator": { 

1377 "domain_controller": None 

1378 }, 

1379 "simple_dc": {"user_mapping": {"*": True}}, 

1380 "verbose": 0, 

1381 } 

1382 app = WsgiDAVApp(config) 

1383 

1384 server_args = { 

1385 "bind_addr": (config["host"], config["port"]), 

1386 "wsgi_app": app, 

1387 } 

1388 server = wsgi.Server(**server_args) 

1389 server.prepare() 

1390 

1391 try: 

1392 # Start the actual server in a separate thread 

1393 t = Thread(target=server.serve, daemon=True) 

1394 t.start() 

1395 # watch stopWebdavServer, and gracefully 

1396 # shut down the server when True 

1397 while True: 

1398 if stopWebdavServer(): 

1399 break 

1400 time.sleep(1) 

1401 except KeyboardInterrupt: 

1402 print("Caught Ctrl-C, shutting down...") 

1403 finally: 

1404 server.stop() 

1405 t.join() 

1406 

1407 def _getfreeport(): 

1408 """ 

1409 Determines a free port using sockets. 

1410 """ 

1411 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

1412 free_socket.bind(('0.0.0.0', 0)) 

1413 free_socket.listen() 

1414 port = free_socket.getsockname()[1] 

1415 free_socket.close() 

1416 return port 

1417 

1418 

1419if __name__ == "__main__": 1419 ↛ 1420line 1419 didn't jump to line 1420, because the condition on line 1419 was never true

1420 unittest.main()