Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import os 

26import posixpath 

27import unittest 

28import tempfile 

29import shutil 

30import pickle 

31import string 

32import random 

33 

34try: 

35 import boto3 

36 import botocore 

37 from moto import mock_s3 

38except ImportError: 

39 boto3 = None 

40 

41 def mock_s3(cls): 

42 """A no-op decorator in case moto mock_s3 can not be imported. 

43 """ 

44 return cls 

45 

46from lsst.utils import doImport 

47from lsst.daf.butler.core.safeFileIo import safeMakeDir 

48from lsst.daf.butler import Butler, Config, ButlerConfig 

49from lsst.daf.butler import StorageClassFactory 

50from lsst.daf.butler import DatasetType, DatasetRef 

51from lsst.daf.butler import FileTemplateValidationError, ValidationError 

52from lsst.daf.butler import FileDataset 

53from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

54from lsst.daf.butler.core.location import ButlerURI 

55from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials, 

56 unsetAwsEnvCredentials) 

57 

58from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

59 

60TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

61 

62 

63def makeExampleMetrics(): 

64 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

65 {"a": [1, 2, 3], 

66 "b": {"blue": 5, "red": "green"}}, 

67 [563, 234, 456.7, 752, 8, 9, 27] 

68 ) 

69 

70 

71class TransactionTestError(Exception): 

72 """Specific error for testing transactions, to prevent misdiagnosing 

73 that might otherwise occur when a standard exception is used. 

74 """ 

75 pass 

76 

77 

78class ButlerConfigTests(unittest.TestCase): 

79 """Simple tests for ButlerConfig that are not tested in other test cases. 

80 """ 

81 

82 def testSearchPath(self): 

83 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

84 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

85 config1 = ButlerConfig(configFile) 

86 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

87 

88 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

89 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

90 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

91 self.assertIn("testConfigs", "\n".join(cm.output)) 

92 

93 key = ("datastore", "records", "table") 

94 self.assertNotEqual(config1[key], config2[key]) 

95 self.assertEqual(config2[key], "override_record") 

96 

97 

98class ButlerPutGetTests: 

99 """Helper method for running a suite of put/get tests from different 

100 butler configurations.""" 

101 

102 root = None 

103 

104 @staticmethod 

105 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

106 """Create a DatasetType and register it 

107 """ 

108 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

109 registry.registerDatasetType(datasetType) 

110 return datasetType 

111 

112 @classmethod 

113 def setUpClass(cls): 

114 cls.storageClassFactory = StorageClassFactory() 

115 cls.storageClassFactory.addFromConfig(cls.configFile) 

116 

117 def assertGetComponents(self, butler, datasetRef, components, reference): 

118 datasetTypeName = datasetRef.datasetType.name 

119 dataId = datasetRef.dataId 

120 for component in components: 

121 compTypeName = DatasetType.nameWithComponent(datasetTypeName, component) 

122 result = butler.get(compTypeName, dataId) 

123 self.assertEqual(result, getattr(reference, component)) 

124 

125 def tearDown(self): 

126 if self.root is not None and os.path.exists(self.root): 

127 shutil.rmtree(self.root, ignore_errors=True) 

128 

129 def runPutGetTest(self, storageClass, datasetTypeName): 

130 butler = Butler(self.tmpConfigFile, run="ingest") 

131 

132 # There will not be a collection yet 

133 collections = butler.registry.getAllCollections() 

134 self.assertEqual(collections, set()) 

135 

136 # Create and register a DatasetType 

137 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

138 

139 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

140 

141 # Add needed Dimensions 

142 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

143 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

144 "name": "d-r", 

145 "abstract_filter": "R"}) 

146 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

147 "name": "fourtwentythree", "physical_filter": "d-r"}) 

148 

149 # Create and store a dataset 

150 metric = makeExampleMetrics() 

151 dataId = {"instrument": "DummyCamComp", "visit": 423} 

152 

153 # Create a DatasetRef for put 

154 refIn = DatasetRef(datasetType, dataId, id=None) 

155 

156 # Put with a preexisting id should fail 

157 with self.assertRaises(ValueError): 

158 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

159 

160 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

161 # and once with a DatasetType 

162 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

163 with self.subTest(args=args): 

164 ref = butler.put(metric, *args) 

165 self.assertIsInstance(ref, DatasetRef) 

166 

167 # Test getDirect 

168 metricOut = butler.getDirect(ref) 

169 self.assertEqual(metric, metricOut) 

170 # Test get 

171 metricOut = butler.get(ref.datasetType.name, dataId) 

172 self.assertEqual(metric, metricOut) 

173 # Test get with a datasetRef 

174 metricOut = butler.get(ref) 

175 self.assertEqual(metric, metricOut) 

176 # Test getDeferred with dataId 

177 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get() 

178 self.assertEqual(metric, metricOut) 

179 # Test getDeferred with a datasetRef 

180 metricOut = butler.getDeferred(ref).get() 

181 self.assertEqual(metric, metricOut) 

182 

183 # Check we can get components 

184 if storageClass.isComposite(): 

185 self.assertGetComponents(butler, ref, 

186 ("summary", "data", "output"), metric) 

187 

188 # Remove from collection only; after that we shouldn't be able 

189 # to find it unless we use the dataset_id. 

190 butler.remove(*args, delete=False) 

191 with self.assertRaises(LookupError): 

192 butler.datasetExists(*args) 

193 # If we use the output ref with the dataset_id, we should 

194 # still be able to load it with getDirect(). 

195 self.assertEqual(metric, butler.getDirect(ref)) 

196 

197 # Reinsert into collection, then delete from Datastore *and* 

198 # remove from collection. 

199 butler.registry.associate(butler.collection, [ref]) 

200 butler.remove(*args) 

201 # Lookup with original args should still fail. 

202 with self.assertRaises(LookupError): 

203 butler.datasetExists(*args) 

204 # Now getDirect() should fail, too. 

205 with self.assertRaises(FileNotFoundError): 

206 butler.getDirect(ref) 

207 # Registry still knows about it, if we use the dataset_id. 

208 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

209 

210 # Put again, then remove completely (this generates a new 

211 # dataset record in registry, with a new ID - the old one 

212 # still exists but it is not in any collection so we don't 

213 # care). 

214 ref = butler.put(metric, *args) 

215 butler.remove(*args, remember=False) 

216 # Lookup with original args should still fail. 

217 with self.assertRaises(LookupError): 

218 butler.datasetExists(*args) 

219 # getDirect() should still fail. 

220 with self.assertRaises(FileNotFoundError): 

221 butler.getDirect(ref) 

222 # Registry shouldn't be able to find it by dataset_id anymore. 

223 self.assertIsNone(butler.registry.getDataset(ref.id)) 

224 

225 # Put the dataset again, since the last thing we did was remove it. 

226 ref = butler.put(metric, refIn) 

227 

228 # Get with parameters 

229 stop = 4 

230 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

231 self.assertNotEqual(metric, sliced) 

232 self.assertEqual(metric.summary, sliced.summary) 

233 self.assertEqual(metric.output, sliced.output) 

234 self.assertEqual(metric.data[:stop], sliced.data) 

235 # getDeferred with parameters 

236 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

237 self.assertNotEqual(metric, sliced) 

238 self.assertEqual(metric.summary, sliced.summary) 

239 self.assertEqual(metric.output, sliced.output) 

240 self.assertEqual(metric.data[:stop], sliced.data) 

241 # getDeferred with deferred parameters 

242 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

243 self.assertNotEqual(metric, sliced) 

244 self.assertEqual(metric.summary, sliced.summary) 

245 self.assertEqual(metric.output, sliced.output) 

246 self.assertEqual(metric.data[:stop], sliced.data) 

247 

248 if storageClass.isComposite(): 

249 # Delete one component and check that the other components 

250 # can still be retrieved 

251 metricOut = butler.get(ref.datasetType.name, dataId) 

252 compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary") 

253 compNameD = DatasetType.nameWithComponent(datasetTypeName, "data") 

254 summary = butler.get(compNameS, dataId) 

255 self.assertEqual(summary, metric.summary) 

256 self.assertTrue(butler.datastore.exists(ref.components["summary"])) 

257 

258 butler.remove(compNameS, dataId, remember=True) 

259 with self.assertRaises(LookupError): 

260 butler.datasetExists(compNameS, dataId) 

261 self.assertFalse(butler.datastore.exists(ref.components["summary"])) 

262 self.assertTrue(butler.datastore.exists(ref.components["data"])) 

263 data = butler.get(compNameD, dataId) 

264 self.assertEqual(data, metric.data) 

265 

266 # Create a Dataset type that has the same name but is inconsistent. 

267 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions, 

268 self.storageClassFactory.getStorageClass("Config")) 

269 

270 # Getting with a dataset type that does not match registry fails 

271 with self.assertRaises(ValueError): 

272 butler.get(inconsistentDatasetType, dataId) 

273 

274 # Combining a DatasetRef with a dataId should fail 

275 with self.assertRaises(ValueError): 

276 butler.get(ref, dataId) 

277 # Getting with an explicit ref should fail if the id doesn't match 

278 with self.assertRaises(ValueError): 

279 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

280 

281 # Getting a dataset with unknown parameters should fail 

282 with self.assertRaises(KeyError): 

283 butler.get(ref, parameters={"unsupported": True}) 

284 

285 # Check we have a collection 

286 collections = butler.registry.getAllCollections() 

287 self.assertEqual(collections, {"ingest", }) 

288 

289 # Clean up to check that we can remove something that may have 

290 # already had a component removed 

291 butler.remove(ref.datasetType.name, dataId) 

292 

293 # Add a dataset back in since some downstream tests require 

294 # something to be present 

295 ref = butler.put(metric, refIn) 

296 

297 return butler 

298 

299 # Construct a butler with no run or collection, but make it writeable. 

300 butler = Butler(self.tmpConfigFile, writeable=True) 

301 # Create and register a DatasetType 

302 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

303 datasetType = self.addDatasetType("example", dimensions, 

304 self.storageClassFactory.getStorageClass("StructuredData"), 

305 butler.registry) 

306 # Add needed Dimensions 

307 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

308 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

309 "name": "d-r", 

310 "abstract_filter": "R"}) 

311 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

312 "name": "fourtwentythree", "physical_filter": "d-r"}) 

313 dataId = {"instrument": "DummyCamComp", "visit": 423} 

314 # Create dataset. 

315 metric = makeExampleMetrics() 

316 # Register a new run and put dataset. 

317 run = "deferred" 

318 butler.registry.registerRun(run) 

319 ref = butler.put(metric, datasetType, dataId, run=run) 

320 # Putting with no run should fail with TypeError. 

321 with self.assertRaises(TypeError): 

322 butler.put(metric, datasetType, dataId) 

323 # Dataset should exist. 

324 self.assertTrue(butler.datasetExists(datasetType, dataId, collection=run)) 

325 # We should be able to get the dataset back, but with and without 

326 # a deferred dataset handle. 

327 self.assertEqual(metric, butler.get(datasetType, dataId, collection=run)) 

328 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collection=run).get()) 

329 # Trying to find the dataset without any collection is a TypeError. 

330 with self.assertRaises(TypeError): 

331 butler.datasetExists(datasetType, dataId) 

332 with self.assertRaises(TypeError): 

333 butler.get(datasetType, dataId) 

334 with self.assertRaises(TypeError): 

335 butler.remove(datasetType, dataId) 

336 # Associate the dataset with a different collection. 

337 butler.registry.associate("tagged", [ref]) 

338 # Deleting the dataset from the new collection should make it findable 

339 # in the original collection but without a Datastore entry. 

340 butler.remove(datasetType, dataId, collection="tagged") 

341 self.assertFalse(butler.datasetExists(datasetType, dataId, collection=run)) 

342 

343 

344class ButlerTests(ButlerPutGetTests): 

345 """Tests for Butler. 

346 """ 

347 useTempRoot = True 

348 

349 def setUp(self): 

350 """Create a new butler root for each test.""" 

351 if self.useTempRoot: 

352 self.root = tempfile.mkdtemp(dir=TESTDIR) 

353 Butler.makeRepo(self.root, config=Config(self.configFile)) 

354 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

355 else: 

356 self.root = None 

357 self.tmpConfigFile = self.configFile 

358 

359 def testConstructor(self): 

360 """Independent test of constructor. 

361 """ 

362 butler = Butler(self.tmpConfigFile, run="ingest") 

363 self.assertIsInstance(butler, Butler) 

364 

365 collections = butler.registry.getAllCollections() 

366 self.assertEqual(collections, set()) 

367 

368 butler2 = Butler(butler=butler, collection="other") 

369 self.assertEqual(butler2.collection, "other") 

370 self.assertIsNone(butler2.run) 

371 self.assertIs(butler.registry, butler2.registry) 

372 self.assertIs(butler.datastore, butler2.datastore) 

373 

374 def testBasicPutGet(self): 

375 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

376 self.runPutGetTest(storageClass, "test_metric") 

377 

378 def testCompositePutGetConcrete(self): 

379 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

380 self.runPutGetTest(storageClass, "test_metric") 

381 

382 def testCompositePutGetVirtual(self): 

383 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

384 self.runPutGetTest(storageClass, "test_metric_comp") 

385 

386 def testIngest(self): 

387 butler = Butler(self.tmpConfigFile, run="ingest") 

388 

389 # Create and register a DatasetType 

390 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

391 

392 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

393 datasetTypeName = "metric" 

394 

395 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

396 

397 # Add needed Dimensions 

398 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

399 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

400 "name": "d-r", 

401 "abstract_filter": "R"}) 

402 for detector in (1, 2): 

403 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

404 "full_name": f"detector{detector}"}) 

405 

406 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

407 "name": "fourtwentythree", "physical_filter": "d-r"}, 

408 {"instrument": "DummyCamComp", "id": 424, 

409 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

410 

411 formatter = doImport("lsst.daf.butler.formatters.yamlFormatter.YamlFormatter") 

412 dataRoot = os.path.join(TESTDIR, "data", "basic") 

413 datasets = [] 

414 for detector in (1, 2): 

415 detector_name = f"detector_{detector}" 

416 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

417 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

418 # Create a DatasetRef for ingest 

419 refIn = DatasetRef(datasetType, dataId, id=None) 

420 

421 datasets.append(FileDataset(path=metricFile, 

422 refs=[refIn], 

423 formatter=formatter)) 

424 

425 butler.ingest(*datasets, transfer="copy") 

426 

427 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

428 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

429 

430 metrics1 = butler.get(datasetTypeName, dataId1) 

431 metrics2 = butler.get(datasetTypeName, dataId2) 

432 self.assertNotEqual(metrics1, metrics2) 

433 

434 # Compare URIs 

435 uri1 = butler.getUri(datasetTypeName, dataId1) 

436 uri2 = butler.getUri(datasetTypeName, dataId2) 

437 self.assertNotEqual(uri1, uri2) 

438 

439 # Now do a multi-dataset but single file ingest 

440 metricFile = os.path.join(dataRoot, "detectors.yaml") 

441 refs = [] 

442 for detector in (1, 2): 

443 detector_name = f"detector_{detector}" 

444 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

445 # Create a DatasetRef for ingest 

446 refs.append(DatasetRef(datasetType, dataId, id=None)) 

447 

448 datasets = [] 

449 datasets.append(FileDataset(path=metricFile, 

450 refs=refs, 

451 formatter=MultiDetectorFormatter)) 

452 

453 butler.ingest(*datasets, transfer="copy") 

454 

455 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

456 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

457 

458 multi1 = butler.get(datasetTypeName, dataId1) 

459 multi2 = butler.get(datasetTypeName, dataId2) 

460 

461 self.assertEqual(multi1, metrics1) 

462 self.assertEqual(multi2, metrics2) 

463 

464 # Compare URIs 

465 uri1 = butler.getUri(datasetTypeName, dataId1) 

466 uri2 = butler.getUri(datasetTypeName, dataId2) 

467 self.assertEqual(uri1, uri2) 

468 

469 # Test that removing one does not break the second 

470 butler.remove(datasetTypeName, dataId1) 

471 with self.assertRaises(LookupError): 

472 butler.datasetExists(datasetTypeName, dataId1) 

473 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

474 multi2b = butler.get(datasetTypeName, dataId2) 

475 self.assertEqual(multi2, multi2b) 

476 

477 def testPickle(self): 

478 """Test pickle support. 

479 """ 

480 butler = Butler(self.tmpConfigFile, run="ingest") 

481 butlerOut = pickle.loads(pickle.dumps(butler)) 

482 self.assertIsInstance(butlerOut, Butler) 

483 self.assertEqual(butlerOut._config, butler._config) 

484 self.assertEqual(butlerOut.collection, butler.collection) 

485 self.assertEqual(butlerOut.run, butler.run) 

486 

487 def testGetDatasetTypes(self): 

488 butler = Butler(self.tmpConfigFile, run="ingest") 

489 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

490 dimensionEntries = [ 

491 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

492 {"instrument": "DummyCamComp"}), 

493 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}), 

494 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

495 ] 

496 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

497 # Add needed Dimensions 

498 for args in dimensionEntries: 

499 butler.registry.insertDimensionData(*args) 

500 

501 # When a DatasetType is added to the registry entries are created 

502 # for each component. Need entries for each component in the test 

503 # configuration otherwise validation won't work. The ones that 

504 # are deliberately broken will be ignored later. 

505 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi"} 

506 components = set() 

507 for datasetTypeName in datasetTypeNames: 

508 # Create and register a DatasetType 

509 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

510 

511 for componentName in storageClass.components: 

512 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

513 

514 fromRegistry = butler.registry.getAllDatasetTypes() 

515 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

516 

517 # Now that we have some dataset types registered, validate them 

518 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

519 "datasetType.component"]) 

520 

521 # Add a new datasetType that will fail template validation 

522 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

523 if self.validationCanFail: 

524 with self.assertRaises(ValidationError): 

525 butler.validateConfiguration() 

526 

527 # Rerun validation but with a subset of dataset type names 

528 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

529 

530 # Rerun validation but ignore the bad datasetType 

531 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

532 "datasetType.component"]) 

533 

534 def testTransaction(self): 

535 butler = Butler(self.tmpConfigFile, run="ingest") 

536 datasetTypeName = "test_metric" 

537 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

538 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

539 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

540 "abstract_filter": "R"}), 

541 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

542 "physical_filter": "d-r"})) 

543 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

544 metric = makeExampleMetrics() 

545 dataId = {"instrument": "DummyCam", "visit": 42} 

546 with self.assertRaises(TransactionTestError): 

547 with butler.transaction(): 

548 # Create and register a DatasetType 

549 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

550 # Add needed Dimensions 

551 for args in dimensionEntries: 

552 butler.registry.insertDimensionData(*args) 

553 # Store a dataset 

554 ref = butler.put(metric, datasetTypeName, dataId) 

555 self.assertIsInstance(ref, DatasetRef) 

556 # Test getDirect 

557 metricOut = butler.getDirect(ref) 

558 self.assertEqual(metric, metricOut) 

559 # Test get 

560 metricOut = butler.get(datasetTypeName, dataId) 

561 self.assertEqual(metric, metricOut) 

562 # Check we can get components 

563 self.assertGetComponents(butler, ref, 

564 ("summary", "data", "output"), metric) 

565 raise TransactionTestError("This should roll back the entire transaction") 

566 

567 with self.assertRaises(KeyError): 

568 butler.registry.getDatasetType(datasetTypeName) 

569 with self.assertRaises(LookupError): 

570 butler.registry.expandDataId(dataId) 

571 # Should raise KeyError for missing DatasetType 

572 with self.assertRaises(KeyError): 

573 butler.get(datasetTypeName, dataId) 

574 # Also check explicitly if Dataset entry is missing 

575 self.assertIsNone(butler.registry.find(butler.collection, datasetType, dataId)) 

576 # Direct retrieval should not find the file in the Datastore 

577 with self.assertRaises(FileNotFoundError): 

578 butler.getDirect(ref) 

579 

580 def testMakeRepo(self): 

581 """Test that we can write butler configuration to a new repository via 

582 the Butler.makeRepo interface and then instantiate a butler from the 

583 repo root. 

584 """ 

585 # Do not run the test if we know this datastore configuration does 

586 # not support a file system root 

587 if self.fullConfigKey is None: 

588 return 

589 

590 # Remove the file created in setUp 

591 os.unlink(self.tmpConfigFile) 

592 

593 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile)) 

594 limited = Config(self.configFile) 

595 butler1 = Butler(butlerConfig) 

596 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

597 config=Config(self.configFile), overwrite=True) 

598 full = Config(self.tmpConfigFile) 

599 butler2 = Butler(butlerConfig) 

600 # Butlers should have the same configuration regardless of whether 

601 # defaults were expanded. 

602 self.assertEqual(butler1._config, butler2._config) 

603 # Config files loaded directly should not be the same. 

604 self.assertNotEqual(limited, full) 

605 # Make sure "limited" doesn't have a few keys we know it should be 

606 # inheriting from defaults. 

607 self.assertIn(self.fullConfigKey, full) 

608 self.assertNotIn(self.fullConfigKey, limited) 

609 

610 # Collections don't appear until something is put in them 

611 collections1 = butler1.registry.getAllCollections() 

612 self.assertEqual(collections1, set()) 

613 self.assertEqual(butler2.registry.getAllCollections(), collections1) 

614 

615 # Check that a config with no associated file name will not 

616 # work properly with relocatable Butler repo 

617 butlerConfig.configFile = None 

618 with self.assertRaises(ValueError): 

619 Butler(butlerConfig) 

620 

621 with self.assertRaises(FileExistsError): 

622 Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

623 config=Config(self.configFile), overwrite=False) 

624 

625 def testStringification(self): 

626 butler = Butler(self.tmpConfigFile, run="ingest") 

627 butlerStr = str(butler) 

628 

629 if self.datastoreStr is not None: 

630 for testStr in self.datastoreStr: 

631 self.assertIn(testStr, butlerStr) 

632 if self.registryStr is not None: 

633 self.assertIn(self.registryStr, butlerStr) 

634 

635 datastoreName = butler.datastore.name 

636 if self.datastoreName is not None: 

637 for testStr in self.datastoreName: 

638 self.assertIn(testStr, datastoreName) 

639 

640 

641class FileLikeDatastoreButlerTests(ButlerTests): 

642 """Common tests and specialization of ButlerTests for butlers backed 

643 by datastores that inherit from FileLikeDatastore. 

644 """ 

645 

646 def checkFileExists(self, root, path): 

647 """Checks if file exists at a given path (relative to root). 

648 

649 Test testPutTemplates verifies actual physical existance of the files 

650 in the requested location. For POSIXDatastore this test is equivalent 

651 to `os.path.exists` call. 

652 """ 

653 return os.path.exists(os.path.join(root, path)) 

654 

655 def testPutTemplates(self): 

656 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

657 butler = Butler(self.tmpConfigFile, run="ingest") 

658 

659 # Add needed Dimensions 

660 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

661 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

662 "name": "d-r", 

663 "abstract_filter": "R"}) 

664 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

665 "physical_filter": "d-r"}) 

666 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

667 "physical_filter": "d-r"}) 

668 

669 # Create and store a dataset 

670 metric = makeExampleMetrics() 

671 

672 # Create two almost-identical DatasetTypes (both will use default 

673 # template) 

674 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

675 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

676 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

677 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

678 

679 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

680 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

681 dataId3 = {"instrument": "DummyCamComp", "visit": 425} 

682 

683 # Put with exactly the data ID keys needed 

684 ref = butler.put(metric, "metric1", dataId1) 

685 self.assertTrue(self.checkFileExists(butler.datastore.root, 

686 "ingest/metric1/d-r/DummyCamComp_423.pickle")) 

687 

688 # Check the template based on dimensions 

689 butler.datastore.templates.validateTemplates([ref]) 

690 

691 # Put with extra data ID keys (physical_filter is an optional 

692 # dependency); should not change template (at least the way we're 

693 # defining them to behave now; the important thing is that they 

694 # must be consistent). 

695 ref = butler.put(metric, "metric2", dataId2) 

696 self.assertTrue(self.checkFileExists(butler.datastore.root, 

697 "ingest/metric2/d-r/DummyCamComp_v423.pickle")) 

698 

699 # Check the template based on dimensions 

700 butler.datastore.templates.validateTemplates([ref]) 

701 

702 # Now use a file template that will not result in unique filenames 

703 ref = butler.put(metric, "metric3", dataId1) 

704 

705 # Check the template based on dimensions. This one is a bad template 

706 with self.assertRaises(FileTemplateValidationError): 

707 butler.datastore.templates.validateTemplates([ref]) 

708 

709 with self.assertRaises(FileExistsError): 

710 butler.put(metric, "metric3", dataId3) 

711 

712 def testImportExport(self): 

713 # Run put/get tests just to create and populate a repo. 

714 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

715 exportButler = self.runPutGetTest(storageClass, "test_metric") 

716 # Test that the repo actually has at least one dataset. 

717 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

718 self.assertGreater(len(datasets), 0) 

719 # Export those datasets. We used TemporaryDirectory because there 

720 # doesn't seem to be a way to get the filename (as opposed to the file 

721 # object) from any of tempfile's temporary-file context managers. 

722 with tempfile.TemporaryDirectory() as exportDir: 

723 # TODO: When PosixDatastore supports transfer-on-exist, add tests 

724 # for that. 

725 exportFile = os.path.join(exportDir, "exports.yaml") 

726 with exportButler.export(filename=exportFile) as export: 

727 export.saveDatasets(datasets) 

728 self.assertTrue(os.path.exists(exportFile)) 

729 with tempfile.TemporaryDirectory() as importDir: 

730 Butler.makeRepo(importDir, config=Config(self.configFile)) 

731 importButler = Butler(importDir, run="ingest") 

732 importButler.import_(filename=exportFile, directory=exportButler.datastore.root, 

733 transfer="symlink") 

734 for ref in datasets: 

735 with self.subTest(ref=ref): 

736 # Test for existence by passing in the DatasetType and 

737 # data ID separately, to avoid lookup by dataset_id. 

738 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

739 

740 

741class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

742 """PosixDatastore specialization of a butler""" 

743 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

744 fullConfigKey = ".datastore.formatters" 

745 validationCanFail = True 

746 datastoreStr = ["/tmp"] 

747 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"] 

748 registryStr = "/gen3.sqlite3" 

749 

750 

751class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

752 """InMemoryDatastore specialization of a butler""" 

753 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

754 fullConfigKey = None 

755 useTempRoot = False 

756 validationCanFail = False 

757 datastoreStr = ["datastore='InMemory"] 

758 datastoreName = ["InMemoryDatastore@"] 

759 registryStr = ":memory:" 

760 

761 def testIngest(self): 

762 pass 

763 

764 

765class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

766 """PosixDatastore specialization""" 

767 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

768 fullConfigKey = ".datastore.datastores.1.formatters" 

769 validationCanFail = True 

770 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"] 

771 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1", 

772 "SecondDatastore"] 

773 registryStr = "/gen3.sqlite3" 

774 

775 

776class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

777 """Test that a yaml file in one location can refer to a root in another.""" 

778 

779 datastoreStr = ["dir1"] 

780 # Disable the makeRepo test since we are deliberately not using 

781 # butler.yaml as the config name. 

782 fullConfigKey = None 

783 

784 def setUp(self): 

785 self.root = tempfile.mkdtemp(dir=TESTDIR) 

786 

787 # Make a new repository in one place 

788 self.dir1 = os.path.join(self.root, "dir1") 

789 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

790 

791 # Move the yaml file to a different place and add a "root" 

792 self.dir2 = os.path.join(self.root, "dir2") 

793 safeMakeDir(self.dir2) 

794 configFile1 = os.path.join(self.dir1, "butler.yaml") 

795 config = Config(configFile1) 

796 config["root"] = self.dir1 

797 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

798 config.dumpToFile(configFile2) 

799 os.remove(configFile1) 

800 self.tmpConfigFile = configFile2 

801 

802 def testFileLocations(self): 

803 self.assertNotEqual(self.dir1, self.dir2) 

804 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

805 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

806 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

807 

808 

809class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

810 """Test that a config file created by makeRepo outside of repo works.""" 

811 

812 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

813 

814 def setUp(self): 

815 self.root = tempfile.mkdtemp(dir=TESTDIR) 

816 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

817 

818 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

819 Butler.makeRepo(self.root, config=Config(self.configFile), 

820 outfile=self.tmpConfigFile) 

821 

822 def tearDown(self): 

823 if os.path.exists(self.root2): 

824 shutil.rmtree(self.root2, ignore_errors=True) 

825 super().tearDown() 

826 

827 def testConfigExistence(self): 

828 c = Config(self.tmpConfigFile) 

829 uri_config = ButlerURI(c["root"]) 

830 uri_expected = ButlerURI(self.root) 

831 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

832 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

833 

834 def testPutGet(self): 

835 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

836 self.runPutGetTest(storageClass, "test_metric") 

837 

838 

839class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

840 """Test that a config file created by makeRepo outside of repo works.""" 

841 

842 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

843 

844 def setUp(self): 

845 self.root = tempfile.mkdtemp(dir=TESTDIR) 

846 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

847 

848 self.tmpConfigFile = self.root2 

849 Butler.makeRepo(self.root, config=Config(self.configFile), 

850 outfile=self.tmpConfigFile) 

851 

852 def testConfigExistence(self): 

853 # Append the yaml file else Config constructor does not know the file 

854 # type. 

855 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

856 super().testConfigExistence() 

857 

858 

859class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

860 """Test that a config file created by makeRepo outside of repo works.""" 

861 

862 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

863 

864 def setUp(self): 

865 self.root = tempfile.mkdtemp(dir=TESTDIR) 

866 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

867 

868 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

869 Butler.makeRepo(self.root, config=Config(self.configFile), 

870 outfile=self.tmpConfigFile) 

871 

872 

873@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

874@mock_s3 

875class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

876 """S3Datastore specialization of a butler; an S3 storage Datastore + 

877 a local in-memory SqlRegistry. 

878 """ 

879 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

880 fullConfigKey = None 

881 validationCanFail = True 

882 

883 bucketName = "anybucketname" 

884 """Name of the Bucket that will be used in the tests. The name is read from 

885 the config file used with the tests during set-up. 

886 """ 

887 

888 root = "butlerRoot/" 

889 """Root repository directory expected to be used in case useTempRoot=False. 

890 Otherwise the root is set to a 20 characters long randomly generated string 

891 during set-up. 

892 """ 

893 

894 datastoreStr = [f"datastore={root}"] 

895 """Contains all expected root locations in a format expected to be 

896 returned by Butler stringification. 

897 """ 

898 

899 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"] 

900 """The expected format of the S3Datastore string.""" 

901 

902 registryStr = f":memory:" 

903 """Expected format of the Registry string.""" 

904 

905 def genRoot(self): 

906 """Returns a random string of len 20 to serve as a root 

907 name for the temporary bucket repo. 

908 

909 This is equivalent to tempfile.mkdtemp as this is what self.root 

910 becomes when useTempRoot is True. 

911 """ 

912 rndstr = "".join( 

913 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

914 ) 

915 return rndstr + "/" 

916 

917 def setUp(self): 

918 config = Config(self.configFile) 

919 uri = ButlerURI(config[".datastore.datastore.root"]) 

920 self.bucketName = uri.netloc 

921 

922 # set up some fake credentials if they do not exist 

923 self.usingDummyCredentials = setAwsEnvCredentials() 

924 

925 if self.useTempRoot: 

926 self.root = self.genRoot() 

927 rooturi = f"s3://{self.bucketName}/{self.root}" 

928 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

929 

930 # MOTO needs to know that we expect Bucket bucketname to exist 

931 # (this used to be the class attribute bucketName) 

932 s3 = boto3.resource("s3") 

933 s3.create_bucket(Bucket=self.bucketName) 

934 

935 self.datastoreStr = f"datastore={self.root}" 

936 self.datastoreName = [f"S3Datastore@{rooturi}"] 

937 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

938 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

939 

940 def tearDown(self): 

941 s3 = boto3.resource("s3") 

942 bucket = s3.Bucket(self.bucketName) 

943 try: 

944 bucket.objects.all().delete() 

945 except botocore.exceptions.ClientError as e: 

946 if e.response["Error"]["Code"] == "404": 

947 # the key was not reachable - pass 

948 pass 

949 else: 

950 raise 

951 

952 bucket = s3.Bucket(self.bucketName) 

953 bucket.delete() 

954 

955 # unset any potentially set dummy credentials 

956 if self.usingDummyCredentials: 

957 unsetAwsEnvCredentials() 

958 

959 def checkFileExists(self, root, relpath): 

960 """Checks if file exists at a given path (relative to root). 

961 

962 Test testPutTemplates verifies actual physical existance of the files 

963 in the requested location. For S3Datastore this test is equivalent to 

964 `lsst.daf.butler.core.s3utils.s3checkFileExists` call. 

965 """ 

966 uri = ButlerURI(root) 

967 client = boto3.client("s3") 

968 return s3CheckFileExists(uri, client=client)[0] 

969 

970 @unittest.expectedFailure 

971 def testImportExport(self): 

972 super().testImportExport() 

973 

974 

975if __name__ == "__main__": 975 ↛ 976line 975 didn't jump to line 976, because the condition on line 975 was never true

976 unittest.main()