Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import os 

26import posixpath 

27import unittest 

28import tempfile 

29import shutil 

30import pickle 

31import string 

32import random 

33import numpy as np 

34 

35try: 

36 import boto3 

37 import botocore 

38 from moto import mock_s3 

39except ImportError: 

40 boto3 = None 

41 

42 def mock_s3(cls): 

43 """A no-op decorator in case moto mock_s3 can not be imported. 

44 """ 

45 return cls 

46 

47from lsst.utils import doImport 

48from lsst.daf.butler.core.safeFileIo import safeMakeDir 

49from lsst.daf.butler import Butler, Config, ButlerConfig 

50from lsst.daf.butler import StorageClassFactory 

51from lsst.daf.butler import DatasetType, DatasetRef 

52from lsst.daf.butler import FileTemplateValidationError, ValidationError 

53from lsst.daf.butler import FileDataset 

54from lsst.daf.butler import CollectionSearch 

55from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

56from lsst.daf.butler.core.location import ButlerURI 

57from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials, 

58 unsetAwsEnvCredentials) 

59 

60from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

61 

62TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

63 

64 

65def makeExampleMetrics(): 

66 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

67 {"a": [1, 2, 3], 

68 "b": {"blue": 5, "red": "green"}}, 

69 [563, 234, 456.7, 752, 8, 9, 27] 

70 ) 

71 

72 

73class TransactionTestError(Exception): 

74 """Specific error for testing transactions, to prevent misdiagnosing 

75 that might otherwise occur when a standard exception is used. 

76 """ 

77 pass 

78 

79 

80class ButlerConfigTests(unittest.TestCase): 

81 """Simple tests for ButlerConfig that are not tested in other test cases. 

82 """ 

83 

84 def testSearchPath(self): 

85 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

86 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

87 config1 = ButlerConfig(configFile) 

88 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

89 

90 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

91 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

92 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

93 self.assertIn("testConfigs", "\n".join(cm.output)) 

94 

95 key = ("datastore", "records", "table") 

96 self.assertNotEqual(config1[key], config2[key]) 

97 self.assertEqual(config2[key], "override_record") 

98 

99 

100class ButlerPutGetTests: 

101 """Helper method for running a suite of put/get tests from different 

102 butler configurations.""" 

103 

104 root = None 

105 

106 @staticmethod 

107 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

108 """Create a DatasetType and register it 

109 """ 

110 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

111 registry.registerDatasetType(datasetType) 

112 return datasetType 

113 

114 @classmethod 

115 def setUpClass(cls): 

116 cls.storageClassFactory = StorageClassFactory() 

117 cls.storageClassFactory.addFromConfig(cls.configFile) 

118 

119 def assertGetComponents(self, butler, datasetRef, components, reference): 

120 datasetTypeName = datasetRef.datasetType.name 

121 dataId = datasetRef.dataId 

122 for component in components: 

123 compTypeName = DatasetType.nameWithComponent(datasetTypeName, component) 

124 result = butler.get(compTypeName, dataId) 

125 self.assertEqual(result, getattr(reference, component)) 

126 

127 def tearDown(self): 

128 if self.root is not None and os.path.exists(self.root): 

129 shutil.rmtree(self.root, ignore_errors=True) 

130 

131 def runPutGetTest(self, storageClass, datasetTypeName): 

132 # New datasets will be added to run and tag, but we will only look in 

133 # tag when looking up datasets. 

134 run = "ingest/run" 

135 tag = "ingest" 

136 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag]) 

137 

138 # There will not be a collection yet 

139 collections = set(butler.registry.queryCollections()) 

140 self.assertEqual(collections, set([run, tag])) 

141 

142 # Create and register a DatasetType 

143 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

144 

145 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

146 

147 # Add needed Dimensions 

148 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

149 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

150 "name": "d-r", 

151 "abstract_filter": "R"}) 

152 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp", 

153 "id": 1, 

154 "name": "default"}) 

155 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

156 "name": "fourtwentythree", "physical_filter": "d-r", 

157 "visit_system": 1}) 

158 

159 # Create and store a dataset 

160 metric = makeExampleMetrics() 

161 dataId = {"instrument": "DummyCamComp", "visit": 423} 

162 

163 # Create a DatasetRef for put 

164 refIn = DatasetRef(datasetType, dataId, id=None) 

165 

166 # Put with a preexisting id should fail 

167 with self.assertRaises(ValueError): 

168 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

169 

170 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

171 # and once with a DatasetType 

172 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

173 with self.subTest(args=args): 

174 ref = butler.put(metric, *args) 

175 self.assertIsInstance(ref, DatasetRef) 

176 

177 # Test getDirect 

178 metricOut = butler.getDirect(ref) 

179 self.assertEqual(metric, metricOut) 

180 # Test get 

181 metricOut = butler.get(ref.datasetType.name, dataId) 

182 self.assertEqual(metric, metricOut) 

183 # Test get with a datasetRef 

184 metricOut = butler.get(ref) 

185 self.assertEqual(metric, metricOut) 

186 # Test getDeferred with dataId 

187 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get() 

188 self.assertEqual(metric, metricOut) 

189 # Test getDeferred with a datasetRef 

190 metricOut = butler.getDeferred(ref).get() 

191 self.assertEqual(metric, metricOut) 

192 

193 # Check we can get components 

194 if storageClass.isComposite(): 

195 self.assertGetComponents(butler, ref, 

196 ("summary", "data", "output"), metric) 

197 

198 # Remove from the tagged collection only; after that we 

199 # shouldn't be able to find it unless we use the dataset_id. 

200 butler.prune([ref]) 

201 with self.assertRaises(LookupError): 

202 butler.datasetExists(*args) 

203 # Registry still knows about it, if we use the dataset_id. 

204 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

205 # If we use the output ref with the dataset_id, we should 

206 # still be able to load it with getDirect(). 

207 self.assertEqual(metric, butler.getDirect(ref)) 

208 

209 # Reinsert into collection, then delete from Datastore *and* 

210 # remove from collection. 

211 butler.registry.associate(tag, [ref]) 

212 butler.prune([ref], unstore=True) 

213 # Lookup with original args should still fail. 

214 with self.assertRaises(LookupError): 

215 butler.datasetExists(*args) 

216 # Now getDirect() should fail, too. 

217 with self.assertRaises(FileNotFoundError): 

218 butler.getDirect(ref) 

219 # Registry still knows about it, if we use the dataset_id. 

220 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

221 

222 # Now remove the dataset completely. 

223 butler.prune([ref], purge=True, unstore=True) 

224 # Lookup with original args should still fail. 

225 with self.assertRaises(LookupError): 

226 butler.datasetExists(*args) 

227 # getDirect() should still fail. 

228 with self.assertRaises(FileNotFoundError): 

229 butler.getDirect(ref) 

230 # Registry shouldn't be able to find it by dataset_id anymore. 

231 self.assertIsNone(butler.registry.getDataset(ref.id)) 

232 

233 # Put the dataset again, since the last thing we did was remove it. 

234 ref = butler.put(metric, refIn) 

235 

236 # Get with parameters 

237 stop = 4 

238 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

239 self.assertNotEqual(metric, sliced) 

240 self.assertEqual(metric.summary, sliced.summary) 

241 self.assertEqual(metric.output, sliced.output) 

242 self.assertEqual(metric.data[:stop], sliced.data) 

243 # getDeferred with parameters 

244 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

245 self.assertNotEqual(metric, sliced) 

246 self.assertEqual(metric.summary, sliced.summary) 

247 self.assertEqual(metric.output, sliced.output) 

248 self.assertEqual(metric.data[:stop], sliced.data) 

249 # getDeferred with deferred parameters 

250 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

251 self.assertNotEqual(metric, sliced) 

252 self.assertEqual(metric.summary, sliced.summary) 

253 self.assertEqual(metric.output, sliced.output) 

254 self.assertEqual(metric.data[:stop], sliced.data) 

255 

256 if storageClass.isComposite(): 

257 # Delete one component and check that the other components 

258 # can still be retrieved 

259 metricOut = butler.get(ref.datasetType.name, dataId) 

260 compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary") 

261 compNameD = DatasetType.nameWithComponent(datasetTypeName, "data") 

262 summary = butler.get(compNameS, dataId) 

263 self.assertEqual(summary, metric.summary) 

264 self.assertTrue(butler.datastore.exists(ref.components["summary"])) 

265 

266 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

267 butler.prune([compRef], unstore=True) 

268 with self.assertRaises(LookupError): 

269 butler.datasetExists(compNameS, dataId) 

270 self.assertFalse(butler.datastore.exists(ref.components["summary"])) 

271 self.assertTrue(butler.datastore.exists(ref.components["data"])) 

272 data = butler.get(compNameD, dataId) 

273 self.assertEqual(data, metric.data) 

274 

275 # Create a Dataset type that has the same name but is inconsistent. 

276 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions, 

277 self.storageClassFactory.getStorageClass("Config")) 

278 

279 # Getting with a dataset type that does not match registry fails 

280 with self.assertRaises(ValueError): 

281 butler.get(inconsistentDatasetType, dataId) 

282 

283 # Combining a DatasetRef with a dataId should fail 

284 with self.assertRaises(ValueError): 

285 butler.get(ref, dataId) 

286 # Getting with an explicit ref should fail if the id doesn't match 

287 with self.assertRaises(ValueError): 

288 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

289 

290 # Getting a dataset with unknown parameters should fail 

291 with self.assertRaises(KeyError): 

292 butler.get(ref, parameters={"unsupported": True}) 

293 

294 # Check we have a collection 

295 collections = set(butler.registry.queryCollections()) 

296 self.assertEqual(collections, {run, tag}) 

297 

298 # Clean up to check that we can remove something that may have 

299 # already had a component removed 

300 butler.prune([ref], unstore=True, purge=True) 

301 

302 # Add a dataset back in since some downstream tests require 

303 # something to be present 

304 ref = butler.put(metric, refIn) 

305 

306 return butler 

307 

308 def testDeferredCollectionPassing(self): 

309 # Construct a butler with no run or collection, but make it writeable. 

310 butler = Butler(self.tmpConfigFile, writeable=True) 

311 # Create and register a DatasetType 

312 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

313 datasetType = self.addDatasetType("example", dimensions, 

314 self.storageClassFactory.getStorageClass("StructuredData"), 

315 butler.registry) 

316 # Add needed Dimensions 

317 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

318 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

319 "name": "d-r", 

320 "abstract_filter": "R"}) 

321 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

322 "name": "fourtwentythree", "physical_filter": "d-r"}) 

323 dataId = {"instrument": "DummyCamComp", "visit": 423} 

324 # Create dataset. 

325 metric = makeExampleMetrics() 

326 # Register a new run and put dataset. 

327 run = "deferred" 

328 butler.registry.registerRun(run) 

329 ref = butler.put(metric, datasetType, dataId, run=run) 

330 # Putting with no run should fail with TypeError. 

331 with self.assertRaises(TypeError): 

332 butler.put(metric, datasetType, dataId) 

333 # Dataset should exist. 

334 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

335 # We should be able to get the dataset back, but with and without 

336 # a deferred dataset handle. 

337 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

338 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

339 # Trying to find the dataset without any collection is a TypeError. 

340 with self.assertRaises(TypeError): 

341 butler.datasetExists(datasetType, dataId) 

342 with self.assertRaises(TypeError): 

343 butler.get(datasetType, dataId) 

344 # Associate the dataset with a different collection. 

345 butler.registry.registerCollection("tagged") 

346 butler.registry.associate("tagged", [ref]) 

347 # Deleting the dataset from the new collection should make it findable 

348 # in the original collection. 

349 butler.prune([ref], tags=["tagged"]) 

350 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

351 

352 

353class ButlerTests(ButlerPutGetTests): 

354 """Tests for Butler. 

355 """ 

356 useTempRoot = True 

357 

358 def setUp(self): 

359 """Create a new butler root for each test.""" 

360 if self.useTempRoot: 

361 self.root = tempfile.mkdtemp(dir=TESTDIR) 

362 Butler.makeRepo(self.root, config=Config(self.configFile)) 

363 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

364 else: 

365 self.root = None 

366 self.tmpConfigFile = self.configFile 

367 

368 def testConstructor(self): 

369 """Independent test of constructor. 

370 """ 

371 butler = Butler(self.tmpConfigFile, run="ingest") 

372 self.assertIsInstance(butler, Butler) 

373 

374 collections = set(butler.registry.queryCollections()) 

375 self.assertEqual(collections, {"ingest"}) 

376 

377 butler2 = Butler(butler=butler, collections=["other"]) 

378 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"])) 

379 self.assertIsNone(butler2.run) 

380 self.assertIs(butler.registry, butler2.registry) 

381 self.assertIs(butler.datastore, butler2.datastore) 

382 

383 def testBasicPutGet(self): 

384 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

385 self.runPutGetTest(storageClass, "test_metric") 

386 

387 def testCompositePutGetConcrete(self): 

388 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

389 self.runPutGetTest(storageClass, "test_metric") 

390 

391 def testCompositePutGetVirtual(self): 

392 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

393 self.runPutGetTest(storageClass, "test_metric_comp") 

394 

395 def testIngest(self): 

396 butler = Butler(self.tmpConfigFile, run="ingest") 

397 

398 # Create and register a DatasetType 

399 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

400 

401 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

402 datasetTypeName = "metric" 

403 

404 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

405 

406 # Add needed Dimensions 

407 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

408 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

409 "name": "d-r", 

410 "abstract_filter": "R"}) 

411 for detector in (1, 2): 

412 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

413 "full_name": f"detector{detector}"}) 

414 

415 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

416 "name": "fourtwentythree", "physical_filter": "d-r"}, 

417 {"instrument": "DummyCamComp", "id": 424, 

418 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

419 

420 formatter = doImport("lsst.daf.butler.formatters.yamlFormatter.YamlFormatter") 

421 dataRoot = os.path.join(TESTDIR, "data", "basic") 

422 datasets = [] 

423 for detector in (1, 2): 

424 detector_name = f"detector_{detector}" 

425 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

426 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

427 # Create a DatasetRef for ingest 

428 refIn = DatasetRef(datasetType, dataId, id=None) 

429 

430 datasets.append(FileDataset(path=metricFile, 

431 refs=[refIn], 

432 formatter=formatter)) 

433 

434 butler.ingest(*datasets, transfer="copy") 

435 

436 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

437 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

438 

439 metrics1 = butler.get(datasetTypeName, dataId1) 

440 metrics2 = butler.get(datasetTypeName, dataId2) 

441 self.assertNotEqual(metrics1, metrics2) 

442 

443 # Compare URIs 

444 uri1 = butler.getUri(datasetTypeName, dataId1) 

445 uri2 = butler.getUri(datasetTypeName, dataId2) 

446 self.assertNotEqual(uri1, uri2) 

447 

448 # Now do a multi-dataset but single file ingest 

449 metricFile = os.path.join(dataRoot, "detectors.yaml") 

450 refs = [] 

451 for detector in (1, 2): 

452 detector_name = f"detector_{detector}" 

453 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

454 # Create a DatasetRef for ingest 

455 refs.append(DatasetRef(datasetType, dataId, id=None)) 

456 

457 datasets = [] 

458 datasets.append(FileDataset(path=metricFile, 

459 refs=refs, 

460 formatter=MultiDetectorFormatter)) 

461 

462 butler.ingest(*datasets, transfer="copy") 

463 

464 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

465 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

466 

467 multi1 = butler.get(datasetTypeName, dataId1) 

468 multi2 = butler.get(datasetTypeName, dataId2) 

469 

470 self.assertEqual(multi1, metrics1) 

471 self.assertEqual(multi2, metrics2) 

472 

473 # Compare URIs 

474 uri1 = butler.getUri(datasetTypeName, dataId1) 

475 uri2 = butler.getUri(datasetTypeName, dataId2) 

476 self.assertEqual(uri1, uri2) 

477 

478 # Test that removing one does not break the second 

479 # This line will issue a warning log message for a ChainedDatastore 

480 # that uses an InMemoryDatastore since in-memory can not ingest 

481 # files. 

482 butler.prune([datasets[0].refs[0]], unstore=True, disassociate=False) 

483 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

484 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

485 multi2b = butler.get(datasetTypeName, dataId2) 

486 self.assertEqual(multi2, multi2b) 

487 

488 def testPickle(self): 

489 """Test pickle support. 

490 """ 

491 butler = Butler(self.tmpConfigFile, run="ingest") 

492 butlerOut = pickle.loads(pickle.dumps(butler)) 

493 self.assertIsInstance(butlerOut, Butler) 

494 self.assertEqual(butlerOut._config, butler._config) 

495 self.assertEqual(butlerOut.collections, butler.collections) 

496 self.assertEqual(butlerOut.run, butler.run) 

497 

498 def testGetDatasetTypes(self): 

499 butler = Butler(self.tmpConfigFile, run="ingest") 

500 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

501 dimensionEntries = [ 

502 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

503 {"instrument": "DummyCamComp"}), 

504 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}), 

505 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

506 ] 

507 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

508 # Add needed Dimensions 

509 for args in dimensionEntries: 

510 butler.registry.insertDimensionData(*args) 

511 

512 # When a DatasetType is added to the registry entries are created 

513 # for each component. Need entries for each component in the test 

514 # configuration otherwise validation won't work. The ones that 

515 # are deliberately broken will be ignored later. 

516 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi"} 

517 components = set() 

518 for datasetTypeName in datasetTypeNames: 

519 # Create and register a DatasetType 

520 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

521 

522 for componentName in storageClass.components: 

523 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

524 

525 fromRegistry = set(butler.registry.queryDatasetTypes()) 

526 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

527 

528 # Now that we have some dataset types registered, validate them 

529 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

530 "datasetType.component"]) 

531 

532 # Add a new datasetType that will fail template validation 

533 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

534 if self.validationCanFail: 

535 with self.assertRaises(ValidationError): 

536 butler.validateConfiguration() 

537 

538 # Rerun validation but with a subset of dataset type names 

539 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

540 

541 # Rerun validation but ignore the bad datasetType 

542 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

543 "datasetType.component"]) 

544 

545 def testTransaction(self): 

546 butler = Butler(self.tmpConfigFile, run="ingest") 

547 datasetTypeName = "test_metric" 

548 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

549 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

550 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

551 "abstract_filter": "R"}), 

552 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

553 "physical_filter": "d-r"})) 

554 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

555 metric = makeExampleMetrics() 

556 dataId = {"instrument": "DummyCam", "visit": 42} 

557 with self.assertRaises(TransactionTestError): 

558 with butler.transaction(): 

559 # Create and register a DatasetType 

560 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

561 # Add needed Dimensions 

562 for args in dimensionEntries: 

563 butler.registry.insertDimensionData(*args) 

564 # Store a dataset 

565 ref = butler.put(metric, datasetTypeName, dataId) 

566 self.assertIsInstance(ref, DatasetRef) 

567 # Test getDirect 

568 metricOut = butler.getDirect(ref) 

569 self.assertEqual(metric, metricOut) 

570 # Test get 

571 metricOut = butler.get(datasetTypeName, dataId) 

572 self.assertEqual(metric, metricOut) 

573 # Check we can get components 

574 self.assertGetComponents(butler, ref, 

575 ("summary", "data", "output"), metric) 

576 raise TransactionTestError("This should roll back the entire transaction") 

577 

578 with self.assertRaises(KeyError): 

579 butler.registry.getDatasetType(datasetTypeName) 

580 with self.assertRaises(LookupError): 

581 butler.registry.expandDataId(dataId) 

582 # Should raise KeyError for missing DatasetType 

583 with self.assertRaises(KeyError): 

584 butler.get(datasetTypeName, dataId) 

585 # Also check explicitly if Dataset entry is missing 

586 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

587 # Direct retrieval should not find the file in the Datastore 

588 with self.assertRaises(FileNotFoundError): 

589 butler.getDirect(ref) 

590 

591 def testMakeRepo(self): 

592 """Test that we can write butler configuration to a new repository via 

593 the Butler.makeRepo interface and then instantiate a butler from the 

594 repo root. 

595 """ 

596 # Do not run the test if we know this datastore configuration does 

597 # not support a file system root 

598 if self.fullConfigKey is None: 

599 return 

600 

601 # Remove the file created in setUp 

602 os.unlink(self.tmpConfigFile) 

603 

604 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile)) 

605 limited = Config(self.configFile) 

606 butler1 = Butler(butlerConfig) 

607 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

608 config=Config(self.configFile), overwrite=True) 

609 full = Config(self.tmpConfigFile) 

610 butler2 = Butler(butlerConfig) 

611 # Butlers should have the same configuration regardless of whether 

612 # defaults were expanded. 

613 self.assertEqual(butler1._config, butler2._config) 

614 # Config files loaded directly should not be the same. 

615 self.assertNotEqual(limited, full) 

616 # Make sure "limited" doesn't have a few keys we know it should be 

617 # inheriting from defaults. 

618 self.assertIn(self.fullConfigKey, full) 

619 self.assertNotIn(self.fullConfigKey, limited) 

620 

621 # Collections don't appear until something is put in them 

622 collections1 = set(butler1.registry.queryCollections()) 

623 self.assertEqual(collections1, set()) 

624 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

625 

626 # Check that a config with no associated file name will not 

627 # work properly with relocatable Butler repo 

628 butlerConfig.configFile = None 

629 with self.assertRaises(ValueError): 

630 Butler(butlerConfig) 

631 

632 with self.assertRaises(FileExistsError): 

633 Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

634 config=Config(self.configFile), overwrite=False) 

635 

636 def testStringification(self): 

637 butler = Butler(self.tmpConfigFile, run="ingest") 

638 butlerStr = str(butler) 

639 

640 if self.datastoreStr is not None: 

641 for testStr in self.datastoreStr: 

642 self.assertIn(testStr, butlerStr) 

643 if self.registryStr is not None: 

644 self.assertIn(self.registryStr, butlerStr) 

645 

646 datastoreName = butler.datastore.name 

647 if self.datastoreName is not None: 

648 for testStr in self.datastoreName: 

649 self.assertIn(testStr, datastoreName) 

650 

651 

652class FileLikeDatastoreButlerTests(ButlerTests): 

653 """Common tests and specialization of ButlerTests for butlers backed 

654 by datastores that inherit from FileLikeDatastore. 

655 """ 

656 

657 def checkFileExists(self, root, path): 

658 """Checks if file exists at a given path (relative to root). 

659 

660 Test testPutTemplates verifies actual physical existance of the files 

661 in the requested location. For POSIXDatastore this test is equivalent 

662 to `os.path.exists` call. 

663 """ 

664 return os.path.exists(os.path.join(root, path)) 

665 

666 def testPutTemplates(self): 

667 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

668 butler = Butler(self.tmpConfigFile, run="ingest") 

669 

670 # Add needed Dimensions 

671 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

672 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

673 "name": "d-r", 

674 "abstract_filter": "R"}) 

675 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

676 "physical_filter": "d-r"}) 

677 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

678 "physical_filter": "d-r"}) 

679 

680 # Create and store a dataset 

681 metric = makeExampleMetrics() 

682 

683 # Create two almost-identical DatasetTypes (both will use default 

684 # template) 

685 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

686 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

687 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

688 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

689 

690 dataId1 = {"instrument": "DummyCamComp", "visit": np.int64(423)} 

691 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

692 dataId3 = {"instrument": "DummyCamComp", "visit": 425} 

693 

694 # Put with exactly the data ID keys needed 

695 ref = butler.put(metric, "metric1", dataId1) 

696 self.assertTrue(self.checkFileExists(butler.datastore.root, 

697 "ingest/metric1/d-r/DummyCamComp_423.pickle")) 

698 

699 # Check the template based on dimensions 

700 butler.datastore.templates.validateTemplates([ref]) 

701 

702 # Put with extra data ID keys (physical_filter is an optional 

703 # dependency); should not change template (at least the way we're 

704 # defining them to behave now; the important thing is that they 

705 # must be consistent). 

706 ref = butler.put(metric, "metric2", dataId2) 

707 self.assertTrue(self.checkFileExists(butler.datastore.root, 

708 "ingest/metric2/d-r/DummyCamComp_v423.pickle")) 

709 

710 # Check the template based on dimensions 

711 butler.datastore.templates.validateTemplates([ref]) 

712 

713 # Now use a file template that will not result in unique filenames 

714 ref = butler.put(metric, "metric3", dataId1) 

715 

716 # Check the template based on dimensions. This one is a bad template 

717 with self.assertRaises(FileTemplateValidationError): 

718 butler.datastore.templates.validateTemplates([ref]) 

719 

720 with self.assertRaises(FileExistsError): 

721 butler.put(metric, "metric3", dataId3) 

722 

723 def testImportExport(self): 

724 # Run put/get tests just to create and populate a repo. 

725 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

726 exportButler = self.runPutGetTest(storageClass, "test_metric") 

727 # Test that the repo actually has at least one dataset. 

728 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

729 self.assertGreater(len(datasets), 0) 

730 # Export those datasets. We used TemporaryDirectory because there 

731 # doesn't seem to be a way to get the filename (as opposed to the file 

732 # object) from any of tempfile's temporary-file context managers. 

733 with tempfile.TemporaryDirectory() as exportDir: 

734 # TODO: When PosixDatastore supports transfer-on-exist, add tests 

735 # for that. 

736 exportFile = os.path.join(exportDir, "exports.yaml") 

737 with exportButler.export(filename=exportFile) as export: 

738 export.saveDatasets(datasets) 

739 self.assertTrue(os.path.exists(exportFile)) 

740 with tempfile.TemporaryDirectory() as importDir: 

741 Butler.makeRepo(importDir, config=Config(self.configFile)) 

742 importButler = Butler(importDir, run="ingest/run") 

743 importButler.import_(filename=exportFile, directory=exportButler.datastore.root, 

744 transfer="symlink") 

745 for ref in datasets: 

746 with self.subTest(ref=ref): 

747 # Test for existence by passing in the DatasetType and 

748 # data ID separately, to avoid lookup by dataset_id. 

749 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

750 

751 

752class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

753 """PosixDatastore specialization of a butler""" 

754 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

755 fullConfigKey = ".datastore.formatters" 

756 validationCanFail = True 

757 datastoreStr = ["/tmp"] 

758 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"] 

759 registryStr = "/gen3.sqlite3" 

760 

761 

762class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

763 """InMemoryDatastore specialization of a butler""" 

764 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

765 fullConfigKey = None 

766 useTempRoot = False 

767 validationCanFail = False 

768 datastoreStr = ["datastore='InMemory"] 

769 datastoreName = ["InMemoryDatastore@"] 

770 registryStr = ":memory:" 

771 

772 def testIngest(self): 

773 pass 

774 

775 

776class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

777 """PosixDatastore specialization""" 

778 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

779 fullConfigKey = ".datastore.datastores.1.formatters" 

780 validationCanFail = True 

781 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"] 

782 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1", 

783 "SecondDatastore"] 

784 registryStr = "/gen3.sqlite3" 

785 

786 

787class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

788 """Test that a yaml file in one location can refer to a root in another.""" 

789 

790 datastoreStr = ["dir1"] 

791 # Disable the makeRepo test since we are deliberately not using 

792 # butler.yaml as the config name. 

793 fullConfigKey = None 

794 

795 def setUp(self): 

796 self.root = tempfile.mkdtemp(dir=TESTDIR) 

797 

798 # Make a new repository in one place 

799 self.dir1 = os.path.join(self.root, "dir1") 

800 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

801 

802 # Move the yaml file to a different place and add a "root" 

803 self.dir2 = os.path.join(self.root, "dir2") 

804 safeMakeDir(self.dir2) 

805 configFile1 = os.path.join(self.dir1, "butler.yaml") 

806 config = Config(configFile1) 

807 config["root"] = self.dir1 

808 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

809 config.dumpToFile(configFile2) 

810 os.remove(configFile1) 

811 self.tmpConfigFile = configFile2 

812 

813 def testFileLocations(self): 

814 self.assertNotEqual(self.dir1, self.dir2) 

815 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

816 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

817 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

818 

819 

820class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

821 """Test that a config file created by makeRepo outside of repo works.""" 

822 

823 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

824 

825 def setUp(self): 

826 self.root = tempfile.mkdtemp(dir=TESTDIR) 

827 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

828 

829 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

830 Butler.makeRepo(self.root, config=Config(self.configFile), 

831 outfile=self.tmpConfigFile) 

832 

833 def tearDown(self): 

834 if os.path.exists(self.root2): 

835 shutil.rmtree(self.root2, ignore_errors=True) 

836 super().tearDown() 

837 

838 def testConfigExistence(self): 

839 c = Config(self.tmpConfigFile) 

840 uri_config = ButlerURI(c["root"]) 

841 uri_expected = ButlerURI(self.root) 

842 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

843 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

844 

845 def testPutGet(self): 

846 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

847 self.runPutGetTest(storageClass, "test_metric") 

848 

849 

850class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

851 """Test that a config file created by makeRepo outside of repo works.""" 

852 

853 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

854 

855 def setUp(self): 

856 self.root = tempfile.mkdtemp(dir=TESTDIR) 

857 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

858 

859 self.tmpConfigFile = self.root2 

860 Butler.makeRepo(self.root, config=Config(self.configFile), 

861 outfile=self.tmpConfigFile) 

862 

863 def testConfigExistence(self): 

864 # Append the yaml file else Config constructor does not know the file 

865 # type. 

866 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

867 super().testConfigExistence() 

868 

869 

870class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

871 """Test that a config file created by makeRepo outside of repo works.""" 

872 

873 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

874 

875 def setUp(self): 

876 self.root = tempfile.mkdtemp(dir=TESTDIR) 

877 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

878 

879 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

880 Butler.makeRepo(self.root, config=Config(self.configFile), 

881 outfile=self.tmpConfigFile) 

882 

883 

884@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

885@mock_s3 

886class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

887 """S3Datastore specialization of a butler; an S3 storage Datastore + 

888 a local in-memory SqlRegistry. 

889 """ 

890 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

891 fullConfigKey = None 

892 validationCanFail = True 

893 

894 bucketName = "anybucketname" 

895 """Name of the Bucket that will be used in the tests. The name is read from 

896 the config file used with the tests during set-up. 

897 """ 

898 

899 root = "butlerRoot/" 

900 """Root repository directory expected to be used in case useTempRoot=False. 

901 Otherwise the root is set to a 20 characters long randomly generated string 

902 during set-up. 

903 """ 

904 

905 datastoreStr = [f"datastore={root}"] 

906 """Contains all expected root locations in a format expected to be 

907 returned by Butler stringification. 

908 """ 

909 

910 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"] 

911 """The expected format of the S3Datastore string.""" 

912 

913 registryStr = f":memory:" 

914 """Expected format of the Registry string.""" 

915 

916 def genRoot(self): 

917 """Returns a random string of len 20 to serve as a root 

918 name for the temporary bucket repo. 

919 

920 This is equivalent to tempfile.mkdtemp as this is what self.root 

921 becomes when useTempRoot is True. 

922 """ 

923 rndstr = "".join( 

924 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

925 ) 

926 return rndstr + "/" 

927 

928 def setUp(self): 

929 config = Config(self.configFile) 

930 uri = ButlerURI(config[".datastore.datastore.root"]) 

931 self.bucketName = uri.netloc 

932 

933 # set up some fake credentials if they do not exist 

934 self.usingDummyCredentials = setAwsEnvCredentials() 

935 

936 if self.useTempRoot: 

937 self.root = self.genRoot() 

938 rooturi = f"s3://{self.bucketName}/{self.root}" 

939 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

940 

941 # MOTO needs to know that we expect Bucket bucketname to exist 

942 # (this used to be the class attribute bucketName) 

943 s3 = boto3.resource("s3") 

944 s3.create_bucket(Bucket=self.bucketName) 

945 

946 self.datastoreStr = f"datastore={self.root}" 

947 self.datastoreName = [f"S3Datastore@{rooturi}"] 

948 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

949 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

950 

951 def tearDown(self): 

952 s3 = boto3.resource("s3") 

953 bucket = s3.Bucket(self.bucketName) 

954 try: 

955 bucket.objects.all().delete() 

956 except botocore.exceptions.ClientError as e: 

957 if e.response["Error"]["Code"] == "404": 

958 # the key was not reachable - pass 

959 pass 

960 else: 

961 raise 

962 

963 bucket = s3.Bucket(self.bucketName) 

964 bucket.delete() 

965 

966 # unset any potentially set dummy credentials 

967 if self.usingDummyCredentials: 

968 unsetAwsEnvCredentials() 

969 

970 def checkFileExists(self, root, relpath): 

971 """Checks if file exists at a given path (relative to root). 

972 

973 Test testPutTemplates verifies actual physical existance of the files 

974 in the requested location. For S3Datastore this test is equivalent to 

975 `lsst.daf.butler.core.s3utils.s3checkFileExists` call. 

976 """ 

977 uri = ButlerURI(root) 

978 client = boto3.client("s3") 

979 return s3CheckFileExists(uri, client=client)[0] 

980 

981 @unittest.expectedFailure 

982 def testImportExport(self): 

983 super().testImportExport() 

984 

985 

986if __name__ == "__main__": 986 ↛ 987line 986 didn't jump to line 987, because the condition on line 986 was never true

987 unittest.main()