Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import os 

26import posixpath 

27import unittest 

28import tempfile 

29import shutil 

30import pickle 

31import string 

32import random 

33import time 

34import socket 

35 

36try: 

37 import boto3 

38 import botocore 

39 from moto import mock_s3 

40except ImportError: 

41 boto3 = None 

42 

43 def mock_s3(cls): 

44 """A no-op decorator in case moto mock_s3 can not be imported. 

45 """ 

46 return cls 

47 

48try: 

49 from cheroot import wsgi 

50 from wsgidav.wsgidav_app import WsgiDAVApp 

51except ImportError: 

52 WsgiDAVApp = None 

53 

54import astropy.time 

55from threading import Thread 

56from tempfile import gettempdir 

57from lsst.utils import doImport 

58from lsst.daf.butler.core.utils import safeMakeDir 

59from lsst.daf.butler import Butler, Config, ButlerConfig 

60from lsst.daf.butler import StorageClassFactory 

61from lsst.daf.butler import DatasetType, DatasetRef 

62from lsst.daf.butler import FileTemplateValidationError, ValidationError 

63from lsst.daf.butler import FileDataset 

64from lsst.daf.butler import CollectionSearch, CollectionType 

65from lsst.daf.butler import ButlerURI 

66from lsst.daf.butler import script 

67from lsst.daf.butler.registry import MissingCollectionError, OrphanedRecordError 

68from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

69from lsst.daf.butler.core._butlerUri.s3utils import (setAwsEnvCredentials, 

70 unsetAwsEnvCredentials) 

71from lsst.daf.butler.core._butlerUri.http import isWebdavEndpoint 

72 

73from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

74from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

75 

76TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

77 

78 

79def makeExampleMetrics(): 

80 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

81 {"a": [1, 2, 3], 

82 "b": {"blue": 5, "red": "green"}}, 

83 [563, 234, 456.7, 752, 8, 9, 27] 

84 ) 

85 

86 

87class TransactionTestError(Exception): 

88 """Specific error for testing transactions, to prevent misdiagnosing 

89 that might otherwise occur when a standard exception is used. 

90 """ 

91 pass 

92 

93 

94class ButlerConfigTests(unittest.TestCase): 

95 """Simple tests for ButlerConfig that are not tested in other test cases. 

96 """ 

97 

98 def testSearchPath(self): 

99 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

100 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

101 config1 = ButlerConfig(configFile) 

102 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

103 

104 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

105 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

106 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

107 self.assertIn("testConfigs", "\n".join(cm.output)) 

108 

109 key = ("datastore", "records", "table") 

110 self.assertNotEqual(config1[key], config2[key]) 

111 self.assertEqual(config2[key], "override_record") 

112 

113 

114class ButlerPutGetTests: 

115 """Helper method for running a suite of put/get tests from different 

116 butler configurations.""" 

117 

118 root = None 

119 

120 @staticmethod 

121 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

122 """Create a DatasetType and register it 

123 """ 

124 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

125 registry.registerDatasetType(datasetType) 

126 return datasetType 

127 

128 @classmethod 

129 def setUpClass(cls): 

130 cls.storageClassFactory = StorageClassFactory() 

131 cls.storageClassFactory.addFromConfig(cls.configFile) 

132 

133 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

134 datasetType = datasetRef.datasetType 

135 dataId = datasetRef.dataId 

136 deferred = butler.getDirectDeferred(datasetRef) 

137 

138 for component in components: 

139 compTypeName = datasetType.componentTypeName(component) 

140 result = butler.get(compTypeName, dataId, collections=collections) 

141 self.assertEqual(result, getattr(reference, component)) 

142 result_deferred = deferred.get(component=component) 

143 self.assertEqual(result_deferred, result) 

144 

145 def tearDown(self): 

146 removeTestTempDir(self.root) 

147 

148 def runPutGetTest(self, storageClass, datasetTypeName): 

149 # New datasets will be added to run and tag, but we will only look in 

150 # tag when looking up datasets. 

151 run = "ingest/run" 

152 tag = "ingest" 

153 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag]) 

154 

155 # There will not be a collection yet 

156 collections = set(butler.registry.queryCollections()) 

157 self.assertEqual(collections, set([run, tag])) 

158 

159 # Create and register a DatasetType 

160 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

161 

162 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

163 

164 # Add needed Dimensions 

165 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

166 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

167 "name": "d-r", 

168 "band": "R"}) 

169 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp", 

170 "id": 1, 

171 "name": "default"}) 

172 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

173 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

174 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

175 "name": "fourtwentythree", "physical_filter": "d-r", 

176 "visit_system": 1, "datetime_begin": visit_start, 

177 "datetime_end": visit_end}) 

178 

179 # Add a second visit for some later tests 

180 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424, 

181 "name": "fourtwentyfour", "physical_filter": "d-r", 

182 "visit_system": 1}) 

183 

184 # Create and store a dataset 

185 metric = makeExampleMetrics() 

186 dataId = {"instrument": "DummyCamComp", "visit": 423} 

187 

188 # Create a DatasetRef for put 

189 refIn = DatasetRef(datasetType, dataId, id=None) 

190 

191 # Put with a preexisting id should fail 

192 with self.assertRaises(ValueError): 

193 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

194 

195 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

196 # and once with a DatasetType 

197 

198 # Keep track of any collections we add and do not clean up 

199 expected_collections = {run, tag} 

200 

201 counter = 0 

202 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

203 # Since we are using subTest we can get cascading failures 

204 # here with the first attempt failing and the others failing 

205 # immediately because the dataset already exists. Work around 

206 # this by using a distinct run collection each time 

207 counter += 1 

208 this_run = f"put_run_{counter}" 

209 this_tag = f"put_tag_{counter}" 

210 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

211 butler.registry.registerCollection(this_tag, type=CollectionType.TAGGED) 

212 expected_collections.update({this_run, this_tag}) 

213 

214 with self.subTest(args=args): 

215 ref = butler.put(metric, *args, run=this_run, tags=[this_tag]) 

216 self.assertIsInstance(ref, DatasetRef) 

217 

218 # Test getDirect 

219 metricOut = butler.getDirect(ref) 

220 self.assertEqual(metric, metricOut) 

221 # Test get 

222 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

223 self.assertEqual(metric, metricOut) 

224 # Test get with a datasetRef 

225 metricOut = butler.get(ref, collections=this_run) 

226 self.assertEqual(metric, metricOut) 

227 # Test getDeferred with dataId 

228 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

229 self.assertEqual(metric, metricOut) 

230 # Test getDeferred with a datasetRef 

231 metricOut = butler.getDeferred(ref, collections=this_run).get() 

232 self.assertEqual(metric, metricOut) 

233 # and deferred direct with ref 

234 metricOut = butler.getDirectDeferred(ref).get() 

235 self.assertEqual(metric, metricOut) 

236 

237 # Check we can get components 

238 if storageClass.isComposite(): 

239 self.assertGetComponents(butler, ref, 

240 ("summary", "data", "output"), metric, 

241 collections=this_run) 

242 

243 # Remove from the tagged collection only; after that we 

244 # shouldn't be able to find it unless we use the dataset_id. 

245 butler.pruneDatasets([ref], tags=[this_tag]) 

246 with self.assertRaises(LookupError): 

247 butler.datasetExists(*args, collections=this_tag) 

248 # Registry still knows about it, if we use the dataset_id. 

249 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

250 # If we use the output ref with the dataset_id, we should 

251 # still be able to load it with getDirect(). 

252 self.assertEqual(metric, butler.getDirect(ref)) 

253 

254 # Reinsert into collection, then delete from Datastore *and* 

255 # remove from collection. 

256 butler.registry.associate(this_tag, [ref]) 

257 butler.pruneDatasets([ref], unstore=True, tags=[this_tag]) 

258 # Lookup with original args should still fail. 

259 with self.assertRaises(LookupError): 

260 butler.datasetExists(*args, collections=this_tag) 

261 # Now getDirect() should fail, too. 

262 with self.assertRaises(FileNotFoundError, msg=f"Checking ref {ref} not found"): 

263 butler.getDirect(ref) 

264 # Registry still knows about it, if we use the dataset_id. 

265 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

266 

267 # Now remove the dataset completely. 

268 butler.pruneDatasets([ref], purge=True, unstore=True, tags=[this_tag], run=this_run) 

269 # Lookup with original args should still fail. 

270 with self.assertRaises(LookupError): 

271 butler.datasetExists(*args, collections=this_run) 

272 # getDirect() should still fail. 

273 with self.assertRaises(FileNotFoundError): 

274 butler.getDirect(ref) 

275 # Registry shouldn't be able to find it by dataset_id anymore. 

276 self.assertIsNone(butler.registry.getDataset(ref.id)) 

277 

278 # Cleanup 

279 for coll in (this_run, this_tag): 

280 # Do explicit registry removal since we know they are 

281 # empty 

282 butler.registry.removeCollection(coll) 

283 expected_collections.remove(coll) 

284 

285 # Put the dataset again, since the last thing we did was remove it 

286 # and we want to use the default collection. 

287 ref = butler.put(metric, refIn) 

288 

289 # Get with parameters 

290 stop = 4 

291 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

292 self.assertNotEqual(metric, sliced) 

293 self.assertEqual(metric.summary, sliced.summary) 

294 self.assertEqual(metric.output, sliced.output) 

295 self.assertEqual(metric.data[:stop], sliced.data) 

296 # getDeferred with parameters 

297 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

298 self.assertNotEqual(metric, sliced) 

299 self.assertEqual(metric.summary, sliced.summary) 

300 self.assertEqual(metric.output, sliced.output) 

301 self.assertEqual(metric.data[:stop], sliced.data) 

302 # getDeferred with deferred parameters 

303 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

304 self.assertNotEqual(metric, sliced) 

305 self.assertEqual(metric.summary, sliced.summary) 

306 self.assertEqual(metric.output, sliced.output) 

307 self.assertEqual(metric.data[:stop], sliced.data) 

308 

309 if storageClass.isComposite(): 

310 # Check that components can be retrieved 

311 metricOut = butler.get(ref.datasetType.name, dataId) 

312 compNameS = ref.datasetType.componentTypeName("summary") 

313 compNameD = ref.datasetType.componentTypeName("data") 

314 summary = butler.get(compNameS, dataId) 

315 self.assertEqual(summary, metric.summary) 

316 data = butler.get(compNameD, dataId) 

317 self.assertEqual(data, metric.data) 

318 

319 if "counter" in storageClass.derivedComponents: 

320 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

321 self.assertEqual(count, len(data)) 

322 

323 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId, 

324 parameters={"slice": slice(stop)}) 

325 self.assertEqual(count, stop) 

326 

327 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

328 summary = butler.getDirect(compRef) 

329 self.assertEqual(summary, metric.summary) 

330 

331 # Create a Dataset type that has the same name but is inconsistent. 

332 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions, 

333 self.storageClassFactory.getStorageClass("Config")) 

334 

335 # Getting with a dataset type that does not match registry fails 

336 with self.assertRaises(ValueError): 

337 butler.get(inconsistentDatasetType, dataId) 

338 

339 # Combining a DatasetRef with a dataId should fail 

340 with self.assertRaises(ValueError): 

341 butler.get(ref, dataId) 

342 # Getting with an explicit ref should fail if the id doesn't match 

343 with self.assertRaises(ValueError): 

344 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

345 

346 # Getting a dataset with unknown parameters should fail 

347 with self.assertRaises(KeyError): 

348 butler.get(ref, parameters={"unsupported": True}) 

349 

350 # Check we have a collection 

351 collections = set(butler.registry.queryCollections()) 

352 self.assertEqual(collections, expected_collections) 

353 

354 # Clean up to check that we can remove something that may have 

355 # already had a component removed 

356 butler.pruneDatasets([ref], unstore=True, purge=True) 

357 

358 # Add a dataset back in since some downstream tests require 

359 # something to be present 

360 ref = butler.put(metric, refIn) 

361 

362 return butler 

363 

364 def testDeferredCollectionPassing(self): 

365 # Construct a butler with no run or collection, but make it writeable. 

366 butler = Butler(self.tmpConfigFile, writeable=True) 

367 # Create and register a DatasetType 

368 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

369 datasetType = self.addDatasetType("example", dimensions, 

370 self.storageClassFactory.getStorageClass("StructuredData"), 

371 butler.registry) 

372 # Add needed Dimensions 

373 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

374 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

375 "name": "d-r", 

376 "band": "R"}) 

377 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

378 "name": "fourtwentythree", "physical_filter": "d-r"}) 

379 dataId = {"instrument": "DummyCamComp", "visit": 423} 

380 # Create dataset. 

381 metric = makeExampleMetrics() 

382 # Register a new run and put dataset. 

383 run = "deferred" 

384 butler.registry.registerRun(run) 

385 ref = butler.put(metric, datasetType, dataId, run=run) 

386 # Putting with no run should fail with TypeError. 

387 with self.assertRaises(TypeError): 

388 butler.put(metric, datasetType, dataId) 

389 # Dataset should exist. 

390 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

391 # We should be able to get the dataset back, but with and without 

392 # a deferred dataset handle. 

393 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

394 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

395 # Trying to find the dataset without any collection is a TypeError. 

396 with self.assertRaises(TypeError): 

397 butler.datasetExists(datasetType, dataId) 

398 with self.assertRaises(TypeError): 

399 butler.get(datasetType, dataId) 

400 # Associate the dataset with a different collection. 

401 butler.registry.registerCollection("tagged") 

402 butler.registry.associate("tagged", [ref]) 

403 # Deleting the dataset from the new collection should make it findable 

404 # in the original collection. 

405 butler.pruneDatasets([ref], tags=["tagged"]) 

406 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

407 

408 

409class ButlerTests(ButlerPutGetTests): 

410 """Tests for Butler. 

411 """ 

412 useTempRoot = True 

413 

414 def setUp(self): 

415 """Create a new butler root for each test.""" 

416 self.root = makeTestTempDir(TESTDIR) 

417 Butler.makeRepo(self.root, config=Config(self.configFile)) 

418 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

419 

420 def testConstructor(self): 

421 """Independent test of constructor. 

422 """ 

423 butler = Butler(self.tmpConfigFile, run="ingest") 

424 self.assertIsInstance(butler, Butler) 

425 

426 collections = set(butler.registry.queryCollections()) 

427 self.assertEqual(collections, {"ingest"}) 

428 

429 butler2 = Butler(butler=butler, collections=["other"]) 

430 self.assertEqual( 

431 butler2.collections, 

432 CollectionSearch.fromExpression(["other"]) 

433 ) 

434 self.assertIsNone(butler2.run) 

435 self.assertIs(butler.registry, butler2.registry) 

436 self.assertIs(butler.datastore, butler2.datastore) 

437 

438 def testBasicPutGet(self): 

439 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

440 self.runPutGetTest(storageClass, "test_metric") 

441 

442 def testCompositePutGetConcrete(self): 

443 

444 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

445 butler = self.runPutGetTest(storageClass, "test_metric") 

446 

447 # Should *not* be disassembled 

448 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

449 self.assertEqual(len(datasets), 1) 

450 uri, components = butler.getURIs(datasets[0]) 

451 self.assertIsInstance(uri, ButlerURI) 

452 self.assertFalse(components) 

453 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

454 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

455 

456 # Predicted dataset 

457 dataId = {"instrument": "DummyCamComp", "visit": 424} 

458 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

459 self.assertFalse(components) 

460 self.assertIsInstance(uri, ButlerURI) 

461 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

462 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

463 

464 def testCompositePutGetVirtual(self): 

465 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

466 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

467 

468 # Should be disassembled 

469 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

470 self.assertEqual(len(datasets), 1) 

471 uri, components = butler.getURIs(datasets[0]) 

472 

473 if butler.datastore.isEphemeral: 

474 # Never disassemble in-memory datastore 

475 self.assertIsInstance(uri, ButlerURI) 

476 self.assertFalse(components) 

477 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

478 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

479 else: 

480 self.assertIsNone(uri) 

481 self.assertEqual(set(components), set(storageClass.components)) 

482 for compuri in components.values(): 

483 self.assertIsInstance(compuri, ButlerURI) 

484 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

485 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

486 

487 # Predicted dataset 

488 dataId = {"instrument": "DummyCamComp", "visit": 424} 

489 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

490 

491 if butler.datastore.isEphemeral: 

492 # Never disassembled 

493 self.assertIsInstance(uri, ButlerURI) 

494 self.assertFalse(components) 

495 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

496 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

497 else: 

498 self.assertIsNone(uri) 

499 self.assertEqual(set(components), set(storageClass.components)) 

500 for compuri in components.values(): 

501 self.assertIsInstance(compuri, ButlerURI) 

502 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

503 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

504 

505 def testIngest(self): 

506 butler = Butler(self.tmpConfigFile, run="ingest") 

507 

508 # Create and register a DatasetType 

509 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

510 

511 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

512 datasetTypeName = "metric" 

513 

514 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

515 

516 # Add needed Dimensions 

517 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

518 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

519 "name": "d-r", 

520 "band": "R"}) 

521 for detector in (1, 2): 

522 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

523 "full_name": f"detector{detector}"}) 

524 

525 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

526 "name": "fourtwentythree", "physical_filter": "d-r"}, 

527 {"instrument": "DummyCamComp", "id": 424, 

528 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

529 

530 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

531 dataRoot = os.path.join(TESTDIR, "data", "basic") 

532 datasets = [] 

533 for detector in (1, 2): 

534 detector_name = f"detector_{detector}" 

535 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

536 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

537 # Create a DatasetRef for ingest 

538 refIn = DatasetRef(datasetType, dataId, id=None) 

539 

540 datasets.append(FileDataset(path=metricFile, 

541 refs=[refIn], 

542 formatter=formatter)) 

543 

544 butler.ingest(*datasets, transfer="copy") 

545 

546 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

547 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

548 

549 metrics1 = butler.get(datasetTypeName, dataId1) 

550 metrics2 = butler.get(datasetTypeName, dataId2) 

551 self.assertNotEqual(metrics1, metrics2) 

552 

553 # Compare URIs 

554 uri1 = butler.getURI(datasetTypeName, dataId1) 

555 uri2 = butler.getURI(datasetTypeName, dataId2) 

556 self.assertNotEqual(uri1, uri2) 

557 

558 # Now do a multi-dataset but single file ingest 

559 metricFile = os.path.join(dataRoot, "detectors.yaml") 

560 refs = [] 

561 for detector in (1, 2): 

562 detector_name = f"detector_{detector}" 

563 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

564 # Create a DatasetRef for ingest 

565 refs.append(DatasetRef(datasetType, dataId, id=None)) 

566 

567 datasets = [] 

568 datasets.append(FileDataset(path=metricFile, 

569 refs=refs, 

570 formatter=MultiDetectorFormatter)) 

571 

572 butler.ingest(*datasets, transfer="copy") 

573 

574 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

575 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

576 

577 multi1 = butler.get(datasetTypeName, dataId1) 

578 multi2 = butler.get(datasetTypeName, dataId2) 

579 

580 self.assertEqual(multi1, metrics1) 

581 self.assertEqual(multi2, metrics2) 

582 

583 # Compare URIs 

584 uri1 = butler.getURI(datasetTypeName, dataId1) 

585 uri2 = butler.getURI(datasetTypeName, dataId2) 

586 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

587 

588 # Test that removing one does not break the second 

589 # This line will issue a warning log message for a ChainedDatastore 

590 # that uses an InMemoryDatastore since in-memory can not ingest 

591 # files. 

592 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

593 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

594 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

595 multi2b = butler.get(datasetTypeName, dataId2) 

596 self.assertEqual(multi2, multi2b) 

597 

598 def testPruneCollections(self): 

599 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

600 butler = Butler(self.tmpConfigFile, writeable=True) 

601 # Load registry data with dimensions to hang datasets off of. 

602 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

603 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

604 # Add some RUN-type collections. 

605 run1 = "run1" 

606 butler.registry.registerRun(run1) 

607 run2 = "run2" 

608 butler.registry.registerRun(run2) 

609 # put some datasets. ref1 and ref2 have the same data ID, and are in 

610 # different runs. ref3 has a different data ID. 

611 metric = makeExampleMetrics() 

612 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

613 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

614 butler.registry) 

615 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

616 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

617 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

618 

619 # Add a new dataset type and delete it 

620 tmpName = "prune_collections_disposable" 

621 tmpDatasetType = self.addDatasetType(tmpName, dimensions, storageClass, 

622 butler.registry) 

623 tmpFromRegistry = butler.registry.getDatasetType(tmpName) 

624 self.assertEqual(tmpDatasetType, tmpFromRegistry) 

625 butler.registry.removeDatasetType(tmpName) 

626 with self.assertRaises(KeyError): 

627 butler.registry.getDatasetType(tmpName) 

628 # Removing a second time is fine 

629 butler.registry.removeDatasetType(tmpName) 

630 

631 # Component removal is not allowed 

632 with self.assertRaises(ValueError): 

633 butler.registry.removeDatasetType(DatasetType.nameWithComponent(tmpName, "component")) 

634 

635 # Try and fail to delete a datasetType that is associated with data 

636 with self.assertRaises(OrphanedRecordError): 

637 butler.registry.removeDatasetType(datasetType.name) 

638 

639 # Try to delete a RUN collection without purge, or with purge and not 

640 # unstore. 

641 with self.assertRaises(TypeError): 

642 butler.pruneCollection(run1) 

643 with self.assertRaises(TypeError): 

644 butler.pruneCollection(run2, purge=True) 

645 # Add a TAGGED collection and associate ref3 only into it. 

646 tag1 = "tag1" 

647 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

648 butler.registry.associate(tag1, [ref3]) 

649 # Add a CHAINED collection that searches run1 and then run2. It 

650 # logically contains only ref1, because ref2 is shadowed due to them 

651 # having the same data ID and dataset type. 

652 chain1 = "chain1" 

653 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

654 butler.registry.setCollectionChain(chain1, [run1, run2]) 

655 # Try to delete RUN collections, which should fail with complete 

656 # rollback because they're still referenced by the CHAINED 

657 # collection. 

658 with self.assertRaises(Exception): 

659 butler.pruneCollection(run1, pruge=True, unstore=True) 

660 with self.assertRaises(Exception): 

661 butler.pruneCollection(run2, pruge=True, unstore=True) 

662 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

663 [ref1, ref2, ref3]) 

664 self.assertTrue(butler.datastore.exists(ref1)) 

665 self.assertTrue(butler.datastore.exists(ref2)) 

666 self.assertTrue(butler.datastore.exists(ref3)) 

667 # Try to delete CHAINED and TAGGED collections with purge; should not 

668 # work. 

669 with self.assertRaises(TypeError): 

670 butler.pruneCollection(tag1, purge=True, unstore=True) 

671 with self.assertRaises(TypeError): 

672 butler.pruneCollection(chain1, purge=True, unstore=True) 

673 # Remove the tagged collection with unstore=False. This should not 

674 # affect the datasets. 

675 butler.pruneCollection(tag1) 

676 with self.assertRaises(MissingCollectionError): 

677 butler.registry.getCollectionType(tag1) 

678 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

679 [ref1, ref2, ref3]) 

680 self.assertTrue(butler.datastore.exists(ref1)) 

681 self.assertTrue(butler.datastore.exists(ref2)) 

682 self.assertTrue(butler.datastore.exists(ref3)) 

683 # Add the tagged collection back in, and remove it with unstore=True. 

684 # This should remove ref3 only from the datastore. 

685 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

686 butler.registry.associate(tag1, [ref3]) 

687 butler.pruneCollection(tag1, unstore=True) 

688 with self.assertRaises(MissingCollectionError): 

689 butler.registry.getCollectionType(tag1) 

690 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

691 [ref1, ref2, ref3]) 

692 self.assertTrue(butler.datastore.exists(ref1)) 

693 self.assertTrue(butler.datastore.exists(ref2)) 

694 self.assertFalse(butler.datastore.exists(ref3)) 

695 # Delete the chain with unstore=False. The datasets should not be 

696 # affected at all. 

697 butler.pruneCollection(chain1) 

698 with self.assertRaises(MissingCollectionError): 

699 butler.registry.getCollectionType(chain1) 

700 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

701 [ref1, ref2, ref3]) 

702 self.assertTrue(butler.datastore.exists(ref1)) 

703 self.assertTrue(butler.datastore.exists(ref2)) 

704 self.assertFalse(butler.datastore.exists(ref3)) 

705 # Redefine and then delete the chain with unstore=True. Only ref1 

706 # should be unstored (ref3 has already been unstored, but otherwise 

707 # would be now). 

708 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

709 butler.registry.setCollectionChain(chain1, [run1, run2]) 

710 butler.pruneCollection(chain1, unstore=True) 

711 with self.assertRaises(MissingCollectionError): 

712 butler.registry.getCollectionType(chain1) 

713 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

714 [ref1, ref2, ref3]) 

715 self.assertFalse(butler.datastore.exists(ref1)) 

716 self.assertTrue(butler.datastore.exists(ref2)) 

717 self.assertFalse(butler.datastore.exists(ref3)) 

718 # Remove run1. This removes ref1 and ref3 from the registry (they're 

719 # already gone from the datastore, which is fine). 

720 butler.pruneCollection(run1, purge=True, unstore=True) 

721 with self.assertRaises(MissingCollectionError): 

722 butler.registry.getCollectionType(run1) 

723 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

724 [ref2]) 

725 self.assertTrue(butler.datastore.exists(ref2)) 

726 # Remove run2. This removes ref2 from the registry and the datastore. 

727 butler.pruneCollection(run2, purge=True, unstore=True) 

728 with self.assertRaises(MissingCollectionError): 

729 butler.registry.getCollectionType(run2) 

730 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

731 []) 

732 

733 # Now that the collections have been pruned we can remove the 

734 # dataset type 

735 butler.registry.removeDatasetType(datasetType.name) 

736 

737 def testPickle(self): 

738 """Test pickle support. 

739 """ 

740 butler = Butler(self.tmpConfigFile, run="ingest") 

741 butlerOut = pickle.loads(pickle.dumps(butler)) 

742 self.assertIsInstance(butlerOut, Butler) 

743 self.assertEqual(butlerOut._config, butler._config) 

744 self.assertEqual(butlerOut.collections, butler.collections) 

745 self.assertEqual(butlerOut.run, butler.run) 

746 

747 def testGetDatasetTypes(self): 

748 butler = Butler(self.tmpConfigFile, run="ingest") 

749 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

750 dimensionEntries = [ 

751 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

752 {"instrument": "DummyCamComp"}), 

753 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

754 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

755 ] 

756 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

757 # Add needed Dimensions 

758 for args in dimensionEntries: 

759 butler.registry.insertDimensionData(*args) 

760 

761 # When a DatasetType is added to the registry entries are not created 

762 # for components but querying them can return the components. 

763 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

764 components = set() 

765 for datasetTypeName in datasetTypeNames: 

766 # Create and register a DatasetType 

767 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

768 

769 for componentName in storageClass.components: 

770 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

771 

772 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

773 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

774 

775 # Now that we have some dataset types registered, validate them 

776 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

777 "datasetType.component"]) 

778 

779 # Add a new datasetType that will fail template validation 

780 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

781 if self.validationCanFail: 

782 with self.assertRaises(ValidationError): 

783 butler.validateConfiguration() 

784 

785 # Rerun validation but with a subset of dataset type names 

786 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

787 

788 # Rerun validation but ignore the bad datasetType 

789 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

790 "datasetType.component"]) 

791 

792 def testTransaction(self): 

793 butler = Butler(self.tmpConfigFile, run="ingest") 

794 datasetTypeName = "test_metric" 

795 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

796 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

797 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

798 "band": "R"}), 

799 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

800 "physical_filter": "d-r"})) 

801 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

802 metric = makeExampleMetrics() 

803 dataId = {"instrument": "DummyCam", "visit": 42} 

804 # Create and register a DatasetType 

805 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

806 with self.assertRaises(TransactionTestError): 

807 with butler.transaction(): 

808 # Add needed Dimensions 

809 for args in dimensionEntries: 

810 butler.registry.insertDimensionData(*args) 

811 # Store a dataset 

812 ref = butler.put(metric, datasetTypeName, dataId) 

813 self.assertIsInstance(ref, DatasetRef) 

814 # Test getDirect 

815 metricOut = butler.getDirect(ref) 

816 self.assertEqual(metric, metricOut) 

817 # Test get 

818 metricOut = butler.get(datasetTypeName, dataId) 

819 self.assertEqual(metric, metricOut) 

820 # Check we can get components 

821 self.assertGetComponents(butler, ref, 

822 ("summary", "data", "output"), metric) 

823 raise TransactionTestError("This should roll back the entire transaction") 

824 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"): 

825 butler.registry.expandDataId(dataId) 

826 # Should raise LookupError for missing data ID value 

827 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

828 butler.get(datasetTypeName, dataId) 

829 # Also check explicitly if Dataset entry is missing 

830 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

831 # Direct retrieval should not find the file in the Datastore 

832 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

833 butler.getDirect(ref) 

834 

835 def testMakeRepo(self): 

836 """Test that we can write butler configuration to a new repository via 

837 the Butler.makeRepo interface and then instantiate a butler from the 

838 repo root. 

839 """ 

840 # Do not run the test if we know this datastore configuration does 

841 # not support a file system root 

842 if self.fullConfigKey is None: 

843 return 

844 

845 # create two separate directories 

846 root1 = tempfile.mkdtemp(dir=self.root) 

847 root2 = tempfile.mkdtemp(dir=self.root) 

848 

849 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

850 limited = Config(self.configFile) 

851 butler1 = Butler(butlerConfig) 

852 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

853 full = Config(self.tmpConfigFile) 

854 butler2 = Butler(butlerConfig) 

855 # Butlers should have the same configuration regardless of whether 

856 # defaults were expanded. 

857 self.assertEqual(butler1._config, butler2._config) 

858 # Config files loaded directly should not be the same. 

859 self.assertNotEqual(limited, full) 

860 # Make sure "limited" doesn't have a few keys we know it should be 

861 # inheriting from defaults. 

862 self.assertIn(self.fullConfigKey, full) 

863 self.assertNotIn(self.fullConfigKey, limited) 

864 

865 # Collections don't appear until something is put in them 

866 collections1 = set(butler1.registry.queryCollections()) 

867 self.assertEqual(collections1, set()) 

868 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

869 

870 # Check that a config with no associated file name will not 

871 # work properly with relocatable Butler repo 

872 butlerConfig.configFile = None 

873 with self.assertRaises(ValueError): 

874 Butler(butlerConfig) 

875 

876 with self.assertRaises(FileExistsError): 

877 Butler.makeRepo(self.root, standalone=True, 

878 config=Config(self.configFile), overwrite=False) 

879 

880 def testStringification(self): 

881 butler = Butler(self.tmpConfigFile, run="ingest") 

882 butlerStr = str(butler) 

883 

884 if self.datastoreStr is not None: 

885 for testStr in self.datastoreStr: 

886 self.assertIn(testStr, butlerStr) 

887 if self.registryStr is not None: 

888 self.assertIn(self.registryStr, butlerStr) 

889 

890 datastoreName = butler.datastore.name 

891 if self.datastoreName is not None: 

892 for testStr in self.datastoreName: 

893 self.assertIn(testStr, datastoreName) 

894 

895 

896class FileDatastoreButlerTests(ButlerTests): 

897 """Common tests and specialization of ButlerTests for butlers backed 

898 by datastores that inherit from FileDatastore. 

899 """ 

900 

901 def checkFileExists(self, root, relpath): 

902 """Checks if file exists at a given path (relative to root). 

903 

904 Test testPutTemplates verifies actual physical existance of the files 

905 in the requested location. 

906 """ 

907 uri = ButlerURI(root, forceDirectory=True) 

908 return uri.join(relpath).exists() 

909 

910 def testPutTemplates(self): 

911 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

912 butler = Butler(self.tmpConfigFile, run="ingest") 

913 

914 # Add needed Dimensions 

915 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

916 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

917 "name": "d-r", 

918 "band": "R"}) 

919 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

920 "physical_filter": "d-r"}) 

921 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

922 "physical_filter": "d-r"}) 

923 

924 # Create and store a dataset 

925 metric = makeExampleMetrics() 

926 

927 # Create two almost-identical DatasetTypes (both will use default 

928 # template) 

929 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

930 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

931 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

932 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

933 

934 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

935 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

936 

937 # Put with exactly the data ID keys needed 

938 ref = butler.put(metric, "metric1", dataId1) 

939 uri = butler.getURI(ref) 

940 self.assertTrue(self.checkFileExists(butler.datastore.root, 

941 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"), 

942 f"Checking existence of {uri}") 

943 

944 # Check the template based on dimensions 

945 butler.datastore.templates.validateTemplates([ref]) 

946 

947 # Put with extra data ID keys (physical_filter is an optional 

948 # dependency); should not change template (at least the way we're 

949 # defining them to behave now; the important thing is that they 

950 # must be consistent). 

951 ref = butler.put(metric, "metric2", dataId2) 

952 uri = butler.getURI(ref) 

953 self.assertTrue(self.checkFileExists(butler.datastore.root, 

954 "ingest/metric2/d-r/DummyCamComp_v423.pickle"), 

955 f"Checking existence of {uri}") 

956 

957 # Check the template based on dimensions 

958 butler.datastore.templates.validateTemplates([ref]) 

959 

960 # Now use a file template that will not result in unique filenames 

961 with self.assertRaises(FileTemplateValidationError): 

962 butler.put(metric, "metric3", dataId1) 

963 

964 def testImportExport(self): 

965 # Run put/get tests just to create and populate a repo. 

966 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

967 self.runImportExportTest(storageClass) 

968 

969 @unittest.expectedFailure 

970 def testImportExportVirtualComposite(self): 

971 # Run put/get tests just to create and populate a repo. 

972 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

973 self.runImportExportTest(storageClass) 

974 

975 def runImportExportTest(self, storageClass): 

976 """This test does an export to a temp directory and an import back 

977 into a new temp directory repo. It does not assume a posix datastore""" 

978 exportButler = self.runPutGetTest(storageClass, "test_metric") 

979 print("Root:", exportButler.datastore.root) 

980 # Test that the repo actually has at least one dataset. 

981 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

982 self.assertGreater(len(datasets), 0) 

983 # Add a DimensionRecord that's unused by those datasets. 

984 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

985 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

986 # Export and then import datasets. 

987 with safeTestTempDir(TESTDIR) as exportDir: 

988 exportFile = os.path.join(exportDir, "exports.yaml") 

989 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

990 export.saveDatasets(datasets) 

991 # Export the same datasets again. This should quietly do 

992 # nothing because of internal deduplication, and it shouldn't 

993 # complain about being asked to export the "htm7" elements even 

994 # though there aren't any in these datasets or in the database. 

995 export.saveDatasets(datasets, elements=["htm7"]) 

996 # Save one of the data IDs again; this should be harmless 

997 # because of internal deduplication. 

998 export.saveDataIds([datasets[0].dataId]) 

999 # Save some dimension records directly. 

1000 export.saveDimensionData("skymap", [skymapRecord]) 

1001 self.assertTrue(os.path.exists(exportFile)) 

1002 with safeTestTempDir(TESTDIR) as importDir: 

1003 # We always want this to be a local posix butler 

1004 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1005 # Calling script.butlerImport tests the implementation of the 

1006 # butler command line interface "import" subcommand. Functions 

1007 # in the script folder are generally considered protected and 

1008 # should not be used as public api. 

1009 with open(exportFile, "r") as f: 

1010 script.butlerImport(importDir, export_file=f, 

1011 directory=exportDir, transfer="auto", skip_dimensions=None) 

1012 importButler = Butler(importDir, run="ingest/run") 

1013 for ref in datasets: 

1014 with self.subTest(ref=ref): 

1015 # Test for existence by passing in the DatasetType and 

1016 # data ID separately, to avoid lookup by dataset_id. 

1017 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1018 self.assertEqual(list(importButler.registry.queryDimensionRecords("skymap")), 

1019 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)]) 

1020 

1021 

1022class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1023 """PosixDatastore specialization of a butler""" 

1024 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1025 fullConfigKey = ".datastore.formatters" 

1026 validationCanFail = True 

1027 datastoreStr = ["/tmp"] 

1028 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1029 registryStr = "/gen3.sqlite3" 

1030 

1031 def testExportTransferCopy(self): 

1032 """Test local export using all transfer modes""" 

1033 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1034 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1035 # Test that the repo actually has at least one dataset. 

1036 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1037 self.assertGreater(len(datasets), 0) 

1038 uris = [exportButler.getURI(d) for d in datasets] 

1039 datastoreRoot = exportButler.datastore.root 

1040 

1041 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1042 

1043 for path in pathsInStore: 

1044 # Assume local file system 

1045 self.assertTrue(self.checkFileExists(datastoreRoot, path), 

1046 f"Checking path {path}") 

1047 

1048 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1049 with safeTestTempDir(TESTDIR) as exportDir: 

1050 with exportButler.export(directory=exportDir, format="yaml", 

1051 transfer=transfer) as export: 

1052 export.saveDatasets(datasets) 

1053 for path in pathsInStore: 

1054 self.assertTrue(self.checkFileExists(exportDir, path), 

1055 f"Check that mode {transfer} exported files") 

1056 

1057 

1058class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1059 """InMemoryDatastore specialization of a butler""" 

1060 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1061 fullConfigKey = None 

1062 useTempRoot = False 

1063 validationCanFail = False 

1064 datastoreStr = ["datastore='InMemory"] 

1065 datastoreName = ["InMemoryDatastore@"] 

1066 registryStr = "/gen3.sqlite3" 

1067 

1068 def testIngest(self): 

1069 pass 

1070 

1071 

1072class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1073 """PosixDatastore specialization""" 

1074 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1075 fullConfigKey = ".datastore.datastores.1.formatters" 

1076 validationCanFail = True 

1077 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1078 datastoreName = ["InMemoryDatastore@", f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1079 "SecondDatastore"] 

1080 registryStr = "/gen3.sqlite3" 

1081 

1082 

1083class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1084 """Test that a yaml file in one location can refer to a root in another.""" 

1085 

1086 datastoreStr = ["dir1"] 

1087 # Disable the makeRepo test since we are deliberately not using 

1088 # butler.yaml as the config name. 

1089 fullConfigKey = None 

1090 

1091 def setUp(self): 

1092 self.root = makeTestTempDir(TESTDIR) 

1093 

1094 # Make a new repository in one place 

1095 self.dir1 = os.path.join(self.root, "dir1") 

1096 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1097 

1098 # Move the yaml file to a different place and add a "root" 

1099 self.dir2 = os.path.join(self.root, "dir2") 

1100 safeMakeDir(self.dir2) 

1101 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1102 config = Config(configFile1) 

1103 config["root"] = self.dir1 

1104 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1105 config.dumpToUri(configFile2) 

1106 os.remove(configFile1) 

1107 self.tmpConfigFile = configFile2 

1108 

1109 def testFileLocations(self): 

1110 self.assertNotEqual(self.dir1, self.dir2) 

1111 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1112 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1113 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1114 

1115 

1116class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1117 """Test that a config file created by makeRepo outside of repo works.""" 

1118 

1119 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1120 

1121 def setUp(self): 

1122 self.root = makeTestTempDir(TESTDIR) 

1123 self.root2 = makeTestTempDir(TESTDIR) 

1124 

1125 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1126 Butler.makeRepo(self.root, config=Config(self.configFile), 

1127 outfile=self.tmpConfigFile) 

1128 

1129 def tearDown(self): 

1130 if os.path.exists(self.root2): 

1131 shutil.rmtree(self.root2, ignore_errors=True) 

1132 super().tearDown() 

1133 

1134 def testConfigExistence(self): 

1135 c = Config(self.tmpConfigFile) 

1136 uri_config = ButlerURI(c["root"]) 

1137 uri_expected = ButlerURI(self.root, forceDirectory=True) 

1138 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1139 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1140 

1141 def testPutGet(self): 

1142 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1143 self.runPutGetTest(storageClass, "test_metric") 

1144 

1145 

1146class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1147 """Test that a config file created by makeRepo outside of repo works.""" 

1148 

1149 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1150 

1151 def setUp(self): 

1152 self.root = makeTestTempDir(TESTDIR) 

1153 self.root2 = makeTestTempDir(TESTDIR) 

1154 

1155 self.tmpConfigFile = self.root2 

1156 Butler.makeRepo(self.root, config=Config(self.configFile), 

1157 outfile=self.tmpConfigFile) 

1158 

1159 def testConfigExistence(self): 

1160 # Append the yaml file else Config constructor does not know the file 

1161 # type. 

1162 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1163 super().testConfigExistence() 

1164 

1165 

1166class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1167 """Test that a config file created by makeRepo outside of repo works.""" 

1168 

1169 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1170 

1171 def setUp(self): 

1172 self.root = makeTestTempDir(TESTDIR) 

1173 self.root2 = makeTestTempDir(TESTDIR) 

1174 

1175 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

1176 Butler.makeRepo(self.root, config=Config(self.configFile), 

1177 outfile=self.tmpConfigFile) 

1178 

1179 

1180@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1181@mock_s3 

1182class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1183 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1184 a local in-memory SqlRegistry. 

1185 """ 

1186 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1187 fullConfigKey = None 

1188 validationCanFail = True 

1189 

1190 bucketName = "anybucketname" 

1191 """Name of the Bucket that will be used in the tests. The name is read from 

1192 the config file used with the tests during set-up. 

1193 """ 

1194 

1195 root = "butlerRoot/" 

1196 """Root repository directory expected to be used in case useTempRoot=False. 

1197 Otherwise the root is set to a 20 characters long randomly generated string 

1198 during set-up. 

1199 """ 

1200 

1201 datastoreStr = [f"datastore={root}"] 

1202 """Contains all expected root locations in a format expected to be 

1203 returned by Butler stringification. 

1204 """ 

1205 

1206 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1207 """The expected format of the S3 Datastore string.""" 

1208 

1209 registryStr = "/gen3.sqlite3" 

1210 """Expected format of the Registry string.""" 

1211 

1212 def genRoot(self): 

1213 """Returns a random string of len 20 to serve as a root 

1214 name for the temporary bucket repo. 

1215 

1216 This is equivalent to tempfile.mkdtemp as this is what self.root 

1217 becomes when useTempRoot is True. 

1218 """ 

1219 rndstr = "".join( 

1220 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1221 ) 

1222 return rndstr + "/" 

1223 

1224 def setUp(self): 

1225 config = Config(self.configFile) 

1226 uri = ButlerURI(config[".datastore.datastore.root"]) 

1227 self.bucketName = uri.netloc 

1228 

1229 # set up some fake credentials if they do not exist 

1230 self.usingDummyCredentials = setAwsEnvCredentials() 

1231 

1232 if self.useTempRoot: 

1233 self.root = self.genRoot() 

1234 rooturi = f"s3://{self.bucketName}/{self.root}" 

1235 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1236 

1237 # need local folder to store registry database 

1238 self.reg_dir = makeTestTempDir(TESTDIR) 

1239 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1240 

1241 # MOTO needs to know that we expect Bucket bucketname to exist 

1242 # (this used to be the class attribute bucketName) 

1243 s3 = boto3.resource("s3") 

1244 s3.create_bucket(Bucket=self.bucketName) 

1245 

1246 self.datastoreStr = f"datastore={self.root}" 

1247 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1248 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1249 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1250 

1251 def tearDown(self): 

1252 s3 = boto3.resource("s3") 

1253 bucket = s3.Bucket(self.bucketName) 

1254 try: 

1255 bucket.objects.all().delete() 

1256 except botocore.exceptions.ClientError as e: 

1257 if e.response["Error"]["Code"] == "404": 

1258 # the key was not reachable - pass 

1259 pass 

1260 else: 

1261 raise 

1262 

1263 bucket = s3.Bucket(self.bucketName) 

1264 bucket.delete() 

1265 

1266 # unset any potentially set dummy credentials 

1267 if self.usingDummyCredentials: 

1268 unsetAwsEnvCredentials() 

1269 

1270 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1271 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1272 

1273 

1274@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1275# Mock required environment variables during tests 

1276@unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1277 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1278 TESTDIR, "config/testConfigs/webdav/token"), 

1279 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1280class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1281 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1282 a local in-memory SqlRegistry. 

1283 """ 

1284 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1285 fullConfigKey = None 

1286 validationCanFail = True 

1287 

1288 serverName = "localhost" 

1289 """Name of the server that will be used in the tests. 

1290 """ 

1291 

1292 portNumber = 8080 

1293 """Port on which the webdav server listens. Automatically chosen 

1294 at setUpClass via the _getfreeport() method 

1295 """ 

1296 

1297 root = "butlerRoot/" 

1298 """Root repository directory expected to be used in case useTempRoot=False. 

1299 Otherwise the root is set to a 20 characters long randomly generated string 

1300 during set-up. 

1301 """ 

1302 

1303 datastoreStr = [f"datastore={root}"] 

1304 """Contains all expected root locations in a format expected to be 

1305 returned by Butler stringification. 

1306 """ 

1307 

1308 datastoreName = ["FileDatastore@https://{serverName}/{root}"] 

1309 """The expected format of the WebdavDatastore string.""" 

1310 

1311 registryStr = "/gen3.sqlite3" 

1312 """Expected format of the Registry string.""" 

1313 

1314 serverThread = None 

1315 """Thread in which the local webdav server will run""" 

1316 

1317 stopWebdavServer = False 

1318 """This flag will cause the webdav server to 

1319 gracefully shut down when True 

1320 """ 

1321 

1322 def genRoot(self): 

1323 """Returns a random string of len 20 to serve as a root 

1324 name for the temporary bucket repo. 

1325 

1326 This is equivalent to tempfile.mkdtemp as this is what self.root 

1327 becomes when useTempRoot is True. 

1328 """ 

1329 rndstr = "".join( 

1330 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1331 ) 

1332 return rndstr + "/" 

1333 

1334 @classmethod 

1335 def setUpClass(cls): 

1336 # Do the same as inherited class 

1337 cls.storageClassFactory = StorageClassFactory() 

1338 cls.storageClassFactory.addFromConfig(cls.configFile) 

1339 

1340 cls.portNumber = cls._getfreeport() 

1341 # Run a local webdav server on which tests will be run 

1342 cls.serverThread = Thread(target=cls._serveWebdav, 

1343 args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), 

1344 daemon=True) 

1345 cls.serverThread.start() 

1346 # Wait for it to start 

1347 time.sleep(3) 

1348 

1349 @classmethod 

1350 def tearDownClass(cls): 

1351 # Ask for graceful shut down of the webdav server 

1352 cls.stopWebdavServer = True 

1353 # Wait for the thread to exit 

1354 cls.serverThread.join() 

1355 

1356 # Mock required environment variables during tests 

1357 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1358 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1359 TESTDIR, "config/testConfigs/webdav/token"), 

1360 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1361 def setUp(self): 

1362 config = Config(self.configFile) 

1363 

1364 if self.useTempRoot: 

1365 self.root = self.genRoot() 

1366 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1367 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1368 

1369 # need local folder to store registry database 

1370 self.reg_dir = makeTestTempDir(TESTDIR) 

1371 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1372 

1373 self.datastoreStr = f"datastore={self.root}" 

1374 self.datastoreName = [f"FileDatastore@{self.rooturi}"] 

1375 

1376 if not isWebdavEndpoint(self.rooturi): 

1377 raise OSError("Webdav server not running properly: cannot run tests.") 

1378 

1379 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1380 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1381 

1382 # Mock required environment variables during tests 

1383 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1384 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1385 TESTDIR, "config/testConfigs/webdav/token"), 

1386 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1387 def tearDown(self): 

1388 # Clear temporary directory 

1389 ButlerURI(self.rooturi).remove() 

1390 ButlerURI(self.rooturi).session.close() 

1391 

1392 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1393 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1394 

1395 def _serveWebdav(self, port: int, stopWebdavServer): 

1396 """Starts a local webdav-compatible HTTP server, 

1397 Listening on http://localhost:8080 

1398 This server only runs when this test class is instantiated, 

1399 and then shuts down. Must be started is a separate thread. 

1400 

1401 Parameters 

1402 ---------- 

1403 port : `int` 

1404 The port number on which the server should listen 

1405 """ 

1406 root_path = gettempdir() 

1407 

1408 config = { 

1409 "host": "0.0.0.0", 

1410 "port": port, 

1411 "provider_mapping": {"/": root_path}, 

1412 "http_authenticator": { 

1413 "domain_controller": None 

1414 }, 

1415 "simple_dc": {"user_mapping": {"*": True}}, 

1416 "verbose": 0, 

1417 } 

1418 app = WsgiDAVApp(config) 

1419 

1420 server_args = { 

1421 "bind_addr": (config["host"], config["port"]), 

1422 "wsgi_app": app, 

1423 } 

1424 server = wsgi.Server(**server_args) 

1425 server.prepare() 

1426 

1427 try: 

1428 # Start the actual server in a separate thread 

1429 t = Thread(target=server.serve, daemon=True) 

1430 t.start() 

1431 # watch stopWebdavServer, and gracefully 

1432 # shut down the server when True 

1433 while True: 

1434 if stopWebdavServer(): 

1435 break 

1436 time.sleep(1) 

1437 except KeyboardInterrupt: 

1438 print("Caught Ctrl-C, shutting down...") 

1439 finally: 

1440 server.stop() 

1441 t.join() 

1442 

1443 def _getfreeport(): 

1444 """ 

1445 Determines a free port using sockets. 

1446 """ 

1447 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

1448 free_socket.bind(('0.0.0.0', 0)) 

1449 free_socket.listen() 

1450 port = free_socket.getsockname()[1] 

1451 free_socket.close() 

1452 return port 

1453 

1454 

1455if __name__ == "__main__": 1455 ↛ 1456line 1455 didn't jump to line 1456, because the condition on line 1455 was never true

1456 unittest.main()