Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import os 

26import posixpath 

27import unittest 

28import tempfile 

29import shutil 

30import pickle 

31import string 

32import random 

33import numpy as np 

34 

35try: 

36 import boto3 

37 import botocore 

38 from moto import mock_s3 

39except ImportError: 

40 boto3 = None 

41 

42 def mock_s3(cls): 

43 """A no-op decorator in case moto mock_s3 can not be imported. 

44 """ 

45 return cls 

46 

47from lsst.utils import doImport 

48from lsst.daf.butler.core.safeFileIo import safeMakeDir 

49from lsst.daf.butler import Butler, Config, ButlerConfig 

50from lsst.daf.butler import StorageClassFactory 

51from lsst.daf.butler import DatasetType, DatasetRef 

52from lsst.daf.butler import FileTemplateValidationError, ValidationError 

53from lsst.daf.butler import FileDataset 

54from lsst.daf.butler import CollectionSearch 

55from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

56from lsst.daf.butler.core.location import ButlerURI 

57from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials, 

58 unsetAwsEnvCredentials) 

59 

60from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

61 

62TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

63 

64 

65def makeExampleMetrics(): 

66 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

67 {"a": [1, 2, 3], 

68 "b": {"blue": 5, "red": "green"}}, 

69 [563, 234, 456.7, 752, 8, 9, 27] 

70 ) 

71 

72 

73class TransactionTestError(Exception): 

74 """Specific error for testing transactions, to prevent misdiagnosing 

75 that might otherwise occur when a standard exception is used. 

76 """ 

77 pass 

78 

79 

80class ButlerConfigTests(unittest.TestCase): 

81 """Simple tests for ButlerConfig that are not tested in other test cases. 

82 """ 

83 

84 def testSearchPath(self): 

85 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

86 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

87 config1 = ButlerConfig(configFile) 

88 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

89 

90 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

91 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

92 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

93 self.assertIn("testConfigs", "\n".join(cm.output)) 

94 

95 key = ("datastore", "records", "table") 

96 self.assertNotEqual(config1[key], config2[key]) 

97 self.assertEqual(config2[key], "override_record") 

98 

99 

100class ButlerPutGetTests: 

101 """Helper method for running a suite of put/get tests from different 

102 butler configurations.""" 

103 

104 root = None 

105 

106 @staticmethod 

107 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

108 """Create a DatasetType and register it 

109 """ 

110 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

111 registry.registerDatasetType(datasetType) 

112 return datasetType 

113 

114 @classmethod 

115 def setUpClass(cls): 

116 cls.storageClassFactory = StorageClassFactory() 

117 cls.storageClassFactory.addFromConfig(cls.configFile) 

118 

119 def assertGetComponents(self, butler, datasetRef, components, reference): 

120 datasetTypeName = datasetRef.datasetType.name 

121 dataId = datasetRef.dataId 

122 for component in components: 

123 compTypeName = DatasetType.nameWithComponent(datasetTypeName, component) 

124 result = butler.get(compTypeName, dataId) 

125 self.assertEqual(result, getattr(reference, component)) 

126 

127 def tearDown(self): 

128 if self.root is not None and os.path.exists(self.root): 

129 shutil.rmtree(self.root, ignore_errors=True) 

130 

131 def runPutGetTest(self, storageClass, datasetTypeName): 

132 # New datasets will be added to run and tag, but we will only look in 

133 # tag when looking up datasets. 

134 run = "ingest/run" 

135 tag = "ingest" 

136 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag]) 

137 

138 # There will not be a collection yet 

139 collections = set(butler.registry.queryCollections()) 

140 self.assertEqual(collections, set([run, tag])) 

141 

142 # Create and register a DatasetType 

143 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

144 

145 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

146 

147 # Add needed Dimensions 

148 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

149 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

150 "name": "d-r", 

151 "abstract_filter": "R"}) 

152 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

153 "name": "fourtwentythree", "physical_filter": "d-r"}) 

154 

155 # Create and store a dataset 

156 metric = makeExampleMetrics() 

157 dataId = {"instrument": "DummyCamComp", "visit": 423} 

158 

159 # Create a DatasetRef for put 

160 refIn = DatasetRef(datasetType, dataId, id=None) 

161 

162 # Put with a preexisting id should fail 

163 with self.assertRaises(ValueError): 

164 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

165 

166 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

167 # and once with a DatasetType 

168 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

169 with self.subTest(args=args): 

170 ref = butler.put(metric, *args) 

171 self.assertIsInstance(ref, DatasetRef) 

172 

173 # Test getDirect 

174 metricOut = butler.getDirect(ref) 

175 self.assertEqual(metric, metricOut) 

176 # Test get 

177 metricOut = butler.get(ref.datasetType.name, dataId) 

178 self.assertEqual(metric, metricOut) 

179 # Test get with a datasetRef 

180 metricOut = butler.get(ref) 

181 self.assertEqual(metric, metricOut) 

182 # Test getDeferred with dataId 

183 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get() 

184 self.assertEqual(metric, metricOut) 

185 # Test getDeferred with a datasetRef 

186 metricOut = butler.getDeferred(ref).get() 

187 self.assertEqual(metric, metricOut) 

188 

189 # Check we can get components 

190 if storageClass.isComposite(): 

191 self.assertGetComponents(butler, ref, 

192 ("summary", "data", "output"), metric) 

193 

194 # Remove from the tagged collection only; after that we 

195 # shouldn't be able to find it unless we use the dataset_id. 

196 butler.prune([ref]) 

197 with self.assertRaises(LookupError): 

198 butler.datasetExists(*args) 

199 # Registry still knows about it, if we use the dataset_id. 

200 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

201 # If we use the output ref with the dataset_id, we should 

202 # still be able to load it with getDirect(). 

203 self.assertEqual(metric, butler.getDirect(ref)) 

204 

205 # Reinsert into collection, then delete from Datastore *and* 

206 # remove from collection. 

207 butler.registry.associate(tag, [ref]) 

208 butler.prune([ref], unstore=True) 

209 # Lookup with original args should still fail. 

210 with self.assertRaises(LookupError): 

211 butler.datasetExists(*args) 

212 # Now getDirect() should fail, too. 

213 with self.assertRaises(FileNotFoundError): 

214 butler.getDirect(ref) 

215 # Registry still knows about it, if we use the dataset_id. 

216 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

217 

218 # Now remove the dataset completely. 

219 butler.prune([ref], purge=True, unstore=True) 

220 # Lookup with original args should still fail. 

221 with self.assertRaises(LookupError): 

222 butler.datasetExists(*args) 

223 # getDirect() should still fail. 

224 with self.assertRaises(FileNotFoundError): 

225 butler.getDirect(ref) 

226 # Registry shouldn't be able to find it by dataset_id anymore. 

227 self.assertIsNone(butler.registry.getDataset(ref.id)) 

228 

229 # Put the dataset again, since the last thing we did was remove it. 

230 ref = butler.put(metric, refIn) 

231 

232 # Get with parameters 

233 stop = 4 

234 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

235 self.assertNotEqual(metric, sliced) 

236 self.assertEqual(metric.summary, sliced.summary) 

237 self.assertEqual(metric.output, sliced.output) 

238 self.assertEqual(metric.data[:stop], sliced.data) 

239 # getDeferred with parameters 

240 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

241 self.assertNotEqual(metric, sliced) 

242 self.assertEqual(metric.summary, sliced.summary) 

243 self.assertEqual(metric.output, sliced.output) 

244 self.assertEqual(metric.data[:stop], sliced.data) 

245 # getDeferred with deferred parameters 

246 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

247 self.assertNotEqual(metric, sliced) 

248 self.assertEqual(metric.summary, sliced.summary) 

249 self.assertEqual(metric.output, sliced.output) 

250 self.assertEqual(metric.data[:stop], sliced.data) 

251 

252 if storageClass.isComposite(): 

253 # Delete one component and check that the other components 

254 # can still be retrieved 

255 metricOut = butler.get(ref.datasetType.name, dataId) 

256 compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary") 

257 compNameD = DatasetType.nameWithComponent(datasetTypeName, "data") 

258 summary = butler.get(compNameS, dataId) 

259 self.assertEqual(summary, metric.summary) 

260 self.assertTrue(butler.datastore.exists(ref.components["summary"])) 

261 

262 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

263 butler.prune([compRef], unstore=True) 

264 with self.assertRaises(LookupError): 

265 butler.datasetExists(compNameS, dataId) 

266 self.assertFalse(butler.datastore.exists(ref.components["summary"])) 

267 self.assertTrue(butler.datastore.exists(ref.components["data"])) 

268 data = butler.get(compNameD, dataId) 

269 self.assertEqual(data, metric.data) 

270 

271 # Create a Dataset type that has the same name but is inconsistent. 

272 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions, 

273 self.storageClassFactory.getStorageClass("Config")) 

274 

275 # Getting with a dataset type that does not match registry fails 

276 with self.assertRaises(ValueError): 

277 butler.get(inconsistentDatasetType, dataId) 

278 

279 # Combining a DatasetRef with a dataId should fail 

280 with self.assertRaises(ValueError): 

281 butler.get(ref, dataId) 

282 # Getting with an explicit ref should fail if the id doesn't match 

283 with self.assertRaises(ValueError): 

284 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

285 

286 # Getting a dataset with unknown parameters should fail 

287 with self.assertRaises(KeyError): 

288 butler.get(ref, parameters={"unsupported": True}) 

289 

290 # Check we have a collection 

291 collections = set(butler.registry.queryCollections()) 

292 self.assertEqual(collections, {run, tag}) 

293 

294 # Clean up to check that we can remove something that may have 

295 # already had a component removed 

296 butler.prune([ref], unstore=True, purge=True) 

297 

298 # Add a dataset back in since some downstream tests require 

299 # something to be present 

300 ref = butler.put(metric, refIn) 

301 

302 return butler 

303 

304 def testDeferredCollectionPassing(self): 

305 # Construct a butler with no run or collection, but make it writeable. 

306 butler = Butler(self.tmpConfigFile, writeable=True) 

307 # Create and register a DatasetType 

308 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

309 datasetType = self.addDatasetType("example", dimensions, 

310 self.storageClassFactory.getStorageClass("StructuredData"), 

311 butler.registry) 

312 # Add needed Dimensions 

313 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

314 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

315 "name": "d-r", 

316 "abstract_filter": "R"}) 

317 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

318 "name": "fourtwentythree", "physical_filter": "d-r"}) 

319 dataId = {"instrument": "DummyCamComp", "visit": 423} 

320 # Create dataset. 

321 metric = makeExampleMetrics() 

322 # Register a new run and put dataset. 

323 run = "deferred" 

324 butler.registry.registerRun(run) 

325 ref = butler.put(metric, datasetType, dataId, run=run) 

326 # Putting with no run should fail with TypeError. 

327 with self.assertRaises(TypeError): 

328 butler.put(metric, datasetType, dataId) 

329 # Dataset should exist. 

330 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

331 # We should be able to get the dataset back, but with and without 

332 # a deferred dataset handle. 

333 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

334 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

335 # Trying to find the dataset without any collection is a TypeError. 

336 with self.assertRaises(TypeError): 

337 butler.datasetExists(datasetType, dataId) 

338 with self.assertRaises(TypeError): 

339 butler.get(datasetType, dataId) 

340 # Associate the dataset with a different collection. 

341 butler.registry.registerCollection("tagged") 

342 butler.registry.associate("tagged", [ref]) 

343 # Deleting the dataset from the new collection should make it findable 

344 # in the original collection. 

345 butler.prune([ref], tags=["tagged"]) 

346 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

347 

348 

349class ButlerTests(ButlerPutGetTests): 

350 """Tests for Butler. 

351 """ 

352 useTempRoot = True 

353 

354 def setUp(self): 

355 """Create a new butler root for each test.""" 

356 if self.useTempRoot: 

357 self.root = tempfile.mkdtemp(dir=TESTDIR) 

358 Butler.makeRepo(self.root, config=Config(self.configFile)) 

359 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

360 else: 

361 self.root = None 

362 self.tmpConfigFile = self.configFile 

363 

364 def testConstructor(self): 

365 """Independent test of constructor. 

366 """ 

367 butler = Butler(self.tmpConfigFile, run="ingest") 

368 self.assertIsInstance(butler, Butler) 

369 

370 collections = set(butler.registry.queryCollections()) 

371 self.assertEqual(collections, {"ingest"}) 

372 

373 butler2 = Butler(butler=butler, collections=["other"]) 

374 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"])) 

375 self.assertIsNone(butler2.run) 

376 self.assertIs(butler.registry, butler2.registry) 

377 self.assertIs(butler.datastore, butler2.datastore) 

378 

379 def testBasicPutGet(self): 

380 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

381 self.runPutGetTest(storageClass, "test_metric") 

382 

383 def testCompositePutGetConcrete(self): 

384 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

385 self.runPutGetTest(storageClass, "test_metric") 

386 

387 def testCompositePutGetVirtual(self): 

388 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

389 self.runPutGetTest(storageClass, "test_metric_comp") 

390 

391 def testIngest(self): 

392 butler = Butler(self.tmpConfigFile, run="ingest") 

393 

394 # Create and register a DatasetType 

395 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

396 

397 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

398 datasetTypeName = "metric" 

399 

400 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

401 

402 # Add needed Dimensions 

403 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

404 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

405 "name": "d-r", 

406 "abstract_filter": "R"}) 

407 for detector in (1, 2): 

408 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

409 "full_name": f"detector{detector}"}) 

410 

411 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

412 "name": "fourtwentythree", "physical_filter": "d-r"}, 

413 {"instrument": "DummyCamComp", "id": 424, 

414 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

415 

416 formatter = doImport("lsst.daf.butler.formatters.yamlFormatter.YamlFormatter") 

417 dataRoot = os.path.join(TESTDIR, "data", "basic") 

418 datasets = [] 

419 for detector in (1, 2): 

420 detector_name = f"detector_{detector}" 

421 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

422 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

423 # Create a DatasetRef for ingest 

424 refIn = DatasetRef(datasetType, dataId, id=None) 

425 

426 datasets.append(FileDataset(path=metricFile, 

427 refs=[refIn], 

428 formatter=formatter)) 

429 

430 butler.ingest(*datasets, transfer="copy") 

431 

432 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

433 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

434 

435 metrics1 = butler.get(datasetTypeName, dataId1) 

436 metrics2 = butler.get(datasetTypeName, dataId2) 

437 self.assertNotEqual(metrics1, metrics2) 

438 

439 # Compare URIs 

440 uri1 = butler.getUri(datasetTypeName, dataId1) 

441 uri2 = butler.getUri(datasetTypeName, dataId2) 

442 self.assertNotEqual(uri1, uri2) 

443 

444 # Now do a multi-dataset but single file ingest 

445 metricFile = os.path.join(dataRoot, "detectors.yaml") 

446 refs = [] 

447 for detector in (1, 2): 

448 detector_name = f"detector_{detector}" 

449 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

450 # Create a DatasetRef for ingest 

451 refs.append(DatasetRef(datasetType, dataId, id=None)) 

452 

453 datasets = [] 

454 datasets.append(FileDataset(path=metricFile, 

455 refs=refs, 

456 formatter=MultiDetectorFormatter)) 

457 

458 butler.ingest(*datasets, transfer="copy") 

459 

460 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

461 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

462 

463 multi1 = butler.get(datasetTypeName, dataId1) 

464 multi2 = butler.get(datasetTypeName, dataId2) 

465 

466 self.assertEqual(multi1, metrics1) 

467 self.assertEqual(multi2, metrics2) 

468 

469 # Compare URIs 

470 uri1 = butler.getUri(datasetTypeName, dataId1) 

471 uri2 = butler.getUri(datasetTypeName, dataId2) 

472 self.assertEqual(uri1, uri2) 

473 

474 # Test that removing one does not break the second 

475 butler.prune([datasets[0].refs[0]], unstore=True, disassociate=False) 

476 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

477 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

478 multi2b = butler.get(datasetTypeName, dataId2) 

479 self.assertEqual(multi2, multi2b) 

480 

481 def testPickle(self): 

482 """Test pickle support. 

483 """ 

484 butler = Butler(self.tmpConfigFile, run="ingest") 

485 butlerOut = pickle.loads(pickle.dumps(butler)) 

486 self.assertIsInstance(butlerOut, Butler) 

487 self.assertEqual(butlerOut._config, butler._config) 

488 self.assertEqual(butlerOut.collections, butler.collections) 

489 self.assertEqual(butlerOut.run, butler.run) 

490 

491 def testGetDatasetTypes(self): 

492 butler = Butler(self.tmpConfigFile, run="ingest") 

493 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

494 dimensionEntries = [ 

495 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

496 {"instrument": "DummyCamComp"}), 

497 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}), 

498 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

499 ] 

500 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

501 # Add needed Dimensions 

502 for args in dimensionEntries: 

503 butler.registry.insertDimensionData(*args) 

504 

505 # When a DatasetType is added to the registry entries are created 

506 # for each component. Need entries for each component in the test 

507 # configuration otherwise validation won't work. The ones that 

508 # are deliberately broken will be ignored later. 

509 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi"} 

510 components = set() 

511 for datasetTypeName in datasetTypeNames: 

512 # Create and register a DatasetType 

513 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

514 

515 for componentName in storageClass.components: 

516 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

517 

518 fromRegistry = set(butler.registry.queryDatasetTypes()) 

519 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

520 

521 # Now that we have some dataset types registered, validate them 

522 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

523 "datasetType.component"]) 

524 

525 # Add a new datasetType that will fail template validation 

526 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

527 if self.validationCanFail: 

528 with self.assertRaises(ValidationError): 

529 butler.validateConfiguration() 

530 

531 # Rerun validation but with a subset of dataset type names 

532 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

533 

534 # Rerun validation but ignore the bad datasetType 

535 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

536 "datasetType.component"]) 

537 

538 def testTransaction(self): 

539 butler = Butler(self.tmpConfigFile, run="ingest") 

540 datasetTypeName = "test_metric" 

541 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

542 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

543 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

544 "abstract_filter": "R"}), 

545 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

546 "physical_filter": "d-r"})) 

547 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

548 metric = makeExampleMetrics() 

549 dataId = {"instrument": "DummyCam", "visit": 42} 

550 with self.assertRaises(TransactionTestError): 

551 with butler.transaction(): 

552 # Create and register a DatasetType 

553 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

554 # Add needed Dimensions 

555 for args in dimensionEntries: 

556 butler.registry.insertDimensionData(*args) 

557 # Store a dataset 

558 ref = butler.put(metric, datasetTypeName, dataId) 

559 self.assertIsInstance(ref, DatasetRef) 

560 # Test getDirect 

561 metricOut = butler.getDirect(ref) 

562 self.assertEqual(metric, metricOut) 

563 # Test get 

564 metricOut = butler.get(datasetTypeName, dataId) 

565 self.assertEqual(metric, metricOut) 

566 # Check we can get components 

567 self.assertGetComponents(butler, ref, 

568 ("summary", "data", "output"), metric) 

569 raise TransactionTestError("This should roll back the entire transaction") 

570 

571 with self.assertRaises(KeyError): 

572 butler.registry.getDatasetType(datasetTypeName) 

573 with self.assertRaises(LookupError): 

574 butler.registry.expandDataId(dataId) 

575 # Should raise KeyError for missing DatasetType 

576 with self.assertRaises(KeyError): 

577 butler.get(datasetTypeName, dataId) 

578 # Also check explicitly if Dataset entry is missing 

579 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

580 # Direct retrieval should not find the file in the Datastore 

581 with self.assertRaises(FileNotFoundError): 

582 butler.getDirect(ref) 

583 

584 def testMakeRepo(self): 

585 """Test that we can write butler configuration to a new repository via 

586 the Butler.makeRepo interface and then instantiate a butler from the 

587 repo root. 

588 """ 

589 # Do not run the test if we know this datastore configuration does 

590 # not support a file system root 

591 if self.fullConfigKey is None: 

592 return 

593 

594 # Remove the file created in setUp 

595 os.unlink(self.tmpConfigFile) 

596 

597 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile)) 

598 limited = Config(self.configFile) 

599 butler1 = Butler(butlerConfig) 

600 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

601 config=Config(self.configFile), overwrite=True) 

602 full = Config(self.tmpConfigFile) 

603 butler2 = Butler(butlerConfig) 

604 # Butlers should have the same configuration regardless of whether 

605 # defaults were expanded. 

606 self.assertEqual(butler1._config, butler2._config) 

607 # Config files loaded directly should not be the same. 

608 self.assertNotEqual(limited, full) 

609 # Make sure "limited" doesn't have a few keys we know it should be 

610 # inheriting from defaults. 

611 self.assertIn(self.fullConfigKey, full) 

612 self.assertNotIn(self.fullConfigKey, limited) 

613 

614 # Collections don't appear until something is put in them 

615 collections1 = set(butler1.registry.queryCollections()) 

616 self.assertEqual(collections1, set()) 

617 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

618 

619 # Check that a config with no associated file name will not 

620 # work properly with relocatable Butler repo 

621 butlerConfig.configFile = None 

622 with self.assertRaises(ValueError): 

623 Butler(butlerConfig) 

624 

625 with self.assertRaises(FileExistsError): 

626 Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

627 config=Config(self.configFile), overwrite=False) 

628 

629 def testStringification(self): 

630 butler = Butler(self.tmpConfigFile, run="ingest") 

631 butlerStr = str(butler) 

632 

633 if self.datastoreStr is not None: 

634 for testStr in self.datastoreStr: 

635 self.assertIn(testStr, butlerStr) 

636 if self.registryStr is not None: 

637 self.assertIn(self.registryStr, butlerStr) 

638 

639 datastoreName = butler.datastore.name 

640 if self.datastoreName is not None: 

641 for testStr in self.datastoreName: 

642 self.assertIn(testStr, datastoreName) 

643 

644 

645class FileLikeDatastoreButlerTests(ButlerTests): 

646 """Common tests and specialization of ButlerTests for butlers backed 

647 by datastores that inherit from FileLikeDatastore. 

648 """ 

649 

650 def checkFileExists(self, root, path): 

651 """Checks if file exists at a given path (relative to root). 

652 

653 Test testPutTemplates verifies actual physical existance of the files 

654 in the requested location. For POSIXDatastore this test is equivalent 

655 to `os.path.exists` call. 

656 """ 

657 return os.path.exists(os.path.join(root, path)) 

658 

659 def testPutTemplates(self): 

660 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

661 butler = Butler(self.tmpConfigFile, run="ingest") 

662 

663 # Add needed Dimensions 

664 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

665 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

666 "name": "d-r", 

667 "abstract_filter": "R"}) 

668 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

669 "physical_filter": "d-r"}) 

670 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

671 "physical_filter": "d-r"}) 

672 

673 # Create and store a dataset 

674 metric = makeExampleMetrics() 

675 

676 # Create two almost-identical DatasetTypes (both will use default 

677 # template) 

678 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

679 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

680 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

681 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

682 

683 dataId1 = {"instrument": "DummyCamComp", "visit": np.int64(423)} 

684 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

685 dataId3 = {"instrument": "DummyCamComp", "visit": 425} 

686 

687 # Put with exactly the data ID keys needed 

688 ref = butler.put(metric, "metric1", dataId1) 

689 self.assertTrue(self.checkFileExists(butler.datastore.root, 

690 "ingest/metric1/d-r/DummyCamComp_423.pickle")) 

691 

692 # Check the template based on dimensions 

693 butler.datastore.templates.validateTemplates([ref]) 

694 

695 # Put with extra data ID keys (physical_filter is an optional 

696 # dependency); should not change template (at least the way we're 

697 # defining them to behave now; the important thing is that they 

698 # must be consistent). 

699 ref = butler.put(metric, "metric2", dataId2) 

700 self.assertTrue(self.checkFileExists(butler.datastore.root, 

701 "ingest/metric2/d-r/DummyCamComp_v423.pickle")) 

702 

703 # Check the template based on dimensions 

704 butler.datastore.templates.validateTemplates([ref]) 

705 

706 # Now use a file template that will not result in unique filenames 

707 ref = butler.put(metric, "metric3", dataId1) 

708 

709 # Check the template based on dimensions. This one is a bad template 

710 with self.assertRaises(FileTemplateValidationError): 

711 butler.datastore.templates.validateTemplates([ref]) 

712 

713 with self.assertRaises(FileExistsError): 

714 butler.put(metric, "metric3", dataId3) 

715 

716 def testImportExport(self): 

717 # Run put/get tests just to create and populate a repo. 

718 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

719 exportButler = self.runPutGetTest(storageClass, "test_metric") 

720 # Test that the repo actually has at least one dataset. 

721 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

722 self.assertGreater(len(datasets), 0) 

723 # Export those datasets. We used TemporaryDirectory because there 

724 # doesn't seem to be a way to get the filename (as opposed to the file 

725 # object) from any of tempfile's temporary-file context managers. 

726 with tempfile.TemporaryDirectory() as exportDir: 

727 # TODO: When PosixDatastore supports transfer-on-exist, add tests 

728 # for that. 

729 exportFile = os.path.join(exportDir, "exports.yaml") 

730 with exportButler.export(filename=exportFile) as export: 

731 export.saveDatasets(datasets) 

732 self.assertTrue(os.path.exists(exportFile)) 

733 with tempfile.TemporaryDirectory() as importDir: 

734 Butler.makeRepo(importDir, config=Config(self.configFile)) 

735 importButler = Butler(importDir, run="ingest/run") 

736 importButler.import_(filename=exportFile, directory=exportButler.datastore.root, 

737 transfer="symlink") 

738 for ref in datasets: 

739 with self.subTest(ref=ref): 

740 # Test for existence by passing in the DatasetType and 

741 # data ID separately, to avoid lookup by dataset_id. 

742 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

743 

744 

745class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

746 """PosixDatastore specialization of a butler""" 

747 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

748 fullConfigKey = ".datastore.formatters" 

749 validationCanFail = True 

750 datastoreStr = ["/tmp"] 

751 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"] 

752 registryStr = "/gen3.sqlite3" 

753 

754 

755class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

756 """InMemoryDatastore specialization of a butler""" 

757 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

758 fullConfigKey = None 

759 useTempRoot = False 

760 validationCanFail = False 

761 datastoreStr = ["datastore='InMemory"] 

762 datastoreName = ["InMemoryDatastore@"] 

763 registryStr = ":memory:" 

764 

765 def testIngest(self): 

766 pass 

767 

768 

769class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

770 """PosixDatastore specialization""" 

771 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

772 fullConfigKey = ".datastore.datastores.1.formatters" 

773 validationCanFail = True 

774 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"] 

775 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1", 

776 "SecondDatastore"] 

777 registryStr = "/gen3.sqlite3" 

778 

779 

780class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

781 """Test that a yaml file in one location can refer to a root in another.""" 

782 

783 datastoreStr = ["dir1"] 

784 # Disable the makeRepo test since we are deliberately not using 

785 # butler.yaml as the config name. 

786 fullConfigKey = None 

787 

788 def setUp(self): 

789 self.root = tempfile.mkdtemp(dir=TESTDIR) 

790 

791 # Make a new repository in one place 

792 self.dir1 = os.path.join(self.root, "dir1") 

793 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

794 

795 # Move the yaml file to a different place and add a "root" 

796 self.dir2 = os.path.join(self.root, "dir2") 

797 safeMakeDir(self.dir2) 

798 configFile1 = os.path.join(self.dir1, "butler.yaml") 

799 config = Config(configFile1) 

800 config["root"] = self.dir1 

801 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

802 config.dumpToFile(configFile2) 

803 os.remove(configFile1) 

804 self.tmpConfigFile = configFile2 

805 

806 def testFileLocations(self): 

807 self.assertNotEqual(self.dir1, self.dir2) 

808 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

809 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

810 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

811 

812 

813class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

814 """Test that a config file created by makeRepo outside of repo works.""" 

815 

816 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

817 

818 def setUp(self): 

819 self.root = tempfile.mkdtemp(dir=TESTDIR) 

820 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

821 

822 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

823 Butler.makeRepo(self.root, config=Config(self.configFile), 

824 outfile=self.tmpConfigFile) 

825 

826 def tearDown(self): 

827 if os.path.exists(self.root2): 

828 shutil.rmtree(self.root2, ignore_errors=True) 

829 super().tearDown() 

830 

831 def testConfigExistence(self): 

832 c = Config(self.tmpConfigFile) 

833 uri_config = ButlerURI(c["root"]) 

834 uri_expected = ButlerURI(self.root) 

835 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

836 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

837 

838 def testPutGet(self): 

839 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

840 self.runPutGetTest(storageClass, "test_metric") 

841 

842 

843class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

844 """Test that a config file created by makeRepo outside of repo works.""" 

845 

846 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

847 

848 def setUp(self): 

849 self.root = tempfile.mkdtemp(dir=TESTDIR) 

850 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

851 

852 self.tmpConfigFile = self.root2 

853 Butler.makeRepo(self.root, config=Config(self.configFile), 

854 outfile=self.tmpConfigFile) 

855 

856 def testConfigExistence(self): 

857 # Append the yaml file else Config constructor does not know the file 

858 # type. 

859 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

860 super().testConfigExistence() 

861 

862 

863class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

864 """Test that a config file created by makeRepo outside of repo works.""" 

865 

866 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

867 

868 def setUp(self): 

869 self.root = tempfile.mkdtemp(dir=TESTDIR) 

870 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

871 

872 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

873 Butler.makeRepo(self.root, config=Config(self.configFile), 

874 outfile=self.tmpConfigFile) 

875 

876 

877@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

878@mock_s3 

879class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

880 """S3Datastore specialization of a butler; an S3 storage Datastore + 

881 a local in-memory SqlRegistry. 

882 """ 

883 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

884 fullConfigKey = None 

885 validationCanFail = True 

886 

887 bucketName = "anybucketname" 

888 """Name of the Bucket that will be used in the tests. The name is read from 

889 the config file used with the tests during set-up. 

890 """ 

891 

892 root = "butlerRoot/" 

893 """Root repository directory expected to be used in case useTempRoot=False. 

894 Otherwise the root is set to a 20 characters long randomly generated string 

895 during set-up. 

896 """ 

897 

898 datastoreStr = [f"datastore={root}"] 

899 """Contains all expected root locations in a format expected to be 

900 returned by Butler stringification. 

901 """ 

902 

903 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"] 

904 """The expected format of the S3Datastore string.""" 

905 

906 registryStr = f":memory:" 

907 """Expected format of the Registry string.""" 

908 

909 def genRoot(self): 

910 """Returns a random string of len 20 to serve as a root 

911 name for the temporary bucket repo. 

912 

913 This is equivalent to tempfile.mkdtemp as this is what self.root 

914 becomes when useTempRoot is True. 

915 """ 

916 rndstr = "".join( 

917 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

918 ) 

919 return rndstr + "/" 

920 

921 def setUp(self): 

922 config = Config(self.configFile) 

923 uri = ButlerURI(config[".datastore.datastore.root"]) 

924 self.bucketName = uri.netloc 

925 

926 # set up some fake credentials if they do not exist 

927 self.usingDummyCredentials = setAwsEnvCredentials() 

928 

929 if self.useTempRoot: 

930 self.root = self.genRoot() 

931 rooturi = f"s3://{self.bucketName}/{self.root}" 

932 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

933 

934 # MOTO needs to know that we expect Bucket bucketname to exist 

935 # (this used to be the class attribute bucketName) 

936 s3 = boto3.resource("s3") 

937 s3.create_bucket(Bucket=self.bucketName) 

938 

939 self.datastoreStr = f"datastore={self.root}" 

940 self.datastoreName = [f"S3Datastore@{rooturi}"] 

941 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

942 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

943 

944 def tearDown(self): 

945 s3 = boto3.resource("s3") 

946 bucket = s3.Bucket(self.bucketName) 

947 try: 

948 bucket.objects.all().delete() 

949 except botocore.exceptions.ClientError as e: 

950 if e.response["Error"]["Code"] == "404": 

951 # the key was not reachable - pass 

952 pass 

953 else: 

954 raise 

955 

956 bucket = s3.Bucket(self.bucketName) 

957 bucket.delete() 

958 

959 # unset any potentially set dummy credentials 

960 if self.usingDummyCredentials: 

961 unsetAwsEnvCredentials() 

962 

963 def checkFileExists(self, root, relpath): 

964 """Checks if file exists at a given path (relative to root). 

965 

966 Test testPutTemplates verifies actual physical existance of the files 

967 in the requested location. For S3Datastore this test is equivalent to 

968 `lsst.daf.butler.core.s3utils.s3checkFileExists` call. 

969 """ 

970 uri = ButlerURI(root) 

971 client = boto3.client("s3") 

972 return s3CheckFileExists(uri, client=client)[0] 

973 

974 @unittest.expectedFailure 

975 def testImportExport(self): 

976 super().testImportExport() 

977 

978 

979if __name__ == "__main__": 979 ↛ 980line 979 didn't jump to line 980, because the condition on line 979 was never true

980 unittest.main()