Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import os 

26import posixpath 

27import unittest 

28import tempfile 

29import shutil 

30import pickle 

31import string 

32import random 

33 

34try: 

35 import boto3 

36 import botocore 

37 from moto import mock_s3 

38except ImportError: 

39 boto3 = None 

40 

41 def mock_s3(cls): 

42 """A no-op decorator in case moto mock_s3 can not be imported. 

43 """ 

44 return cls 

45 

46from lsst.utils import doImport 

47from lsst.daf.butler.core.safeFileIo import safeMakeDir 

48from lsst.daf.butler import Butler, Config, ButlerConfig 

49from lsst.daf.butler import StorageClassFactory 

50from lsst.daf.butler import DatasetType, DatasetRef 

51from lsst.daf.butler import FileTemplateValidationError, ValidationError 

52from lsst.daf.butler import FileDataset 

53from lsst.daf.butler import CollectionSearch 

54from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

55from lsst.daf.butler.core.location import ButlerURI 

56from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials, 

57 unsetAwsEnvCredentials) 

58 

59from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

60 

61TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

62 

63 

64def makeExampleMetrics(): 

65 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

66 {"a": [1, 2, 3], 

67 "b": {"blue": 5, "red": "green"}}, 

68 [563, 234, 456.7, 752, 8, 9, 27] 

69 ) 

70 

71 

72class TransactionTestError(Exception): 

73 """Specific error for testing transactions, to prevent misdiagnosing 

74 that might otherwise occur when a standard exception is used. 

75 """ 

76 pass 

77 

78 

79class ButlerConfigTests(unittest.TestCase): 

80 """Simple tests for ButlerConfig that are not tested in other test cases. 

81 """ 

82 

83 def testSearchPath(self): 

84 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

85 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

86 config1 = ButlerConfig(configFile) 

87 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

88 

89 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

90 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

91 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

92 self.assertIn("testConfigs", "\n".join(cm.output)) 

93 

94 key = ("datastore", "records", "table") 

95 self.assertNotEqual(config1[key], config2[key]) 

96 self.assertEqual(config2[key], "override_record") 

97 

98 

99class ButlerPutGetTests: 

100 """Helper method for running a suite of put/get tests from different 

101 butler configurations.""" 

102 

103 root = None 

104 

105 @staticmethod 

106 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

107 """Create a DatasetType and register it 

108 """ 

109 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

110 registry.registerDatasetType(datasetType) 

111 return datasetType 

112 

113 @classmethod 

114 def setUpClass(cls): 

115 cls.storageClassFactory = StorageClassFactory() 

116 cls.storageClassFactory.addFromConfig(cls.configFile) 

117 

118 def assertGetComponents(self, butler, datasetRef, components, reference): 

119 datasetTypeName = datasetRef.datasetType.name 

120 dataId = datasetRef.dataId 

121 for component in components: 

122 compTypeName = DatasetType.nameWithComponent(datasetTypeName, component) 

123 result = butler.get(compTypeName, dataId) 

124 self.assertEqual(result, getattr(reference, component)) 

125 

126 def tearDown(self): 

127 if self.root is not None and os.path.exists(self.root): 

128 shutil.rmtree(self.root, ignore_errors=True) 

129 

130 def runPutGetTest(self, storageClass, datasetTypeName): 

131 # New datasets will be added to run and tag, but we will only look in 

132 # tag when looking up datasets. 

133 run = "ingest/run" 

134 tag = "ingest" 

135 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag]) 

136 

137 # There will not be a collection yet 

138 collections = set(butler.registry.queryCollections()) 

139 self.assertEqual(collections, set([run, tag])) 

140 

141 # Create and register a DatasetType 

142 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

143 

144 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

145 

146 # Add needed Dimensions 

147 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

148 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

149 "name": "d-r", 

150 "abstract_filter": "R"}) 

151 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

152 "name": "fourtwentythree", "physical_filter": "d-r"}) 

153 

154 # Create and store a dataset 

155 metric = makeExampleMetrics() 

156 dataId = {"instrument": "DummyCamComp", "visit": 423} 

157 

158 # Create a DatasetRef for put 

159 refIn = DatasetRef(datasetType, dataId, id=None) 

160 

161 # Put with a preexisting id should fail 

162 with self.assertRaises(ValueError): 

163 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

164 

165 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

166 # and once with a DatasetType 

167 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

168 with self.subTest(args=args): 

169 ref = butler.put(metric, *args) 

170 self.assertIsInstance(ref, DatasetRef) 

171 

172 # Test getDirect 

173 metricOut = butler.getDirect(ref) 

174 self.assertEqual(metric, metricOut) 

175 # Test get 

176 metricOut = butler.get(ref.datasetType.name, dataId) 

177 self.assertEqual(metric, metricOut) 

178 # Test get with a datasetRef 

179 metricOut = butler.get(ref) 

180 self.assertEqual(metric, metricOut) 

181 # Test getDeferred with dataId 

182 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get() 

183 self.assertEqual(metric, metricOut) 

184 # Test getDeferred with a datasetRef 

185 metricOut = butler.getDeferred(ref).get() 

186 self.assertEqual(metric, metricOut) 

187 

188 # Check we can get components 

189 if storageClass.isComposite(): 

190 self.assertGetComponents(butler, ref, 

191 ("summary", "data", "output"), metric) 

192 

193 # Remove from the tagged collection only; after that we 

194 # shouldn't be able to find it unless we use the dataset_id. 

195 butler.prune([ref]) 

196 with self.assertRaises(LookupError): 

197 butler.datasetExists(*args) 

198 # Registry still knows about it, if we use the dataset_id. 

199 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

200 # If we use the output ref with the dataset_id, we should 

201 # still be able to load it with getDirect(). 

202 self.assertEqual(metric, butler.getDirect(ref)) 

203 

204 # Reinsert into collection, then delete from Datastore *and* 

205 # remove from collection. 

206 butler.registry.associate(tag, [ref]) 

207 butler.prune([ref], unstore=True) 

208 # Lookup with original args should still fail. 

209 with self.assertRaises(LookupError): 

210 butler.datasetExists(*args) 

211 # Now getDirect() should fail, too. 

212 with self.assertRaises(FileNotFoundError): 

213 butler.getDirect(ref) 

214 # Registry still knows about it, if we use the dataset_id. 

215 self.assertEqual(butler.registry.getDataset(ref.id), ref) 

216 

217 # Now remove the dataset completely. 

218 butler.prune([ref], purge=True, unstore=True) 

219 # Lookup with original args should still fail. 

220 with self.assertRaises(LookupError): 

221 butler.datasetExists(*args) 

222 # getDirect() should still fail. 

223 with self.assertRaises(FileNotFoundError): 

224 butler.getDirect(ref) 

225 # Registry shouldn't be able to find it by dataset_id anymore. 

226 self.assertIsNone(butler.registry.getDataset(ref.id)) 

227 

228 # Put the dataset again, since the last thing we did was remove it. 

229 ref = butler.put(metric, refIn) 

230 

231 # Get with parameters 

232 stop = 4 

233 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

234 self.assertNotEqual(metric, sliced) 

235 self.assertEqual(metric.summary, sliced.summary) 

236 self.assertEqual(metric.output, sliced.output) 

237 self.assertEqual(metric.data[:stop], sliced.data) 

238 # getDeferred with parameters 

239 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

240 self.assertNotEqual(metric, sliced) 

241 self.assertEqual(metric.summary, sliced.summary) 

242 self.assertEqual(metric.output, sliced.output) 

243 self.assertEqual(metric.data[:stop], sliced.data) 

244 # getDeferred with deferred parameters 

245 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

246 self.assertNotEqual(metric, sliced) 

247 self.assertEqual(metric.summary, sliced.summary) 

248 self.assertEqual(metric.output, sliced.output) 

249 self.assertEqual(metric.data[:stop], sliced.data) 

250 

251 if storageClass.isComposite(): 

252 # Delete one component and check that the other components 

253 # can still be retrieved 

254 metricOut = butler.get(ref.datasetType.name, dataId) 

255 compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary") 

256 compNameD = DatasetType.nameWithComponent(datasetTypeName, "data") 

257 summary = butler.get(compNameS, dataId) 

258 self.assertEqual(summary, metric.summary) 

259 self.assertTrue(butler.datastore.exists(ref.components["summary"])) 

260 

261 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

262 butler.prune([compRef], unstore=True) 

263 with self.assertRaises(LookupError): 

264 butler.datasetExists(compNameS, dataId) 

265 self.assertFalse(butler.datastore.exists(ref.components["summary"])) 

266 self.assertTrue(butler.datastore.exists(ref.components["data"])) 

267 data = butler.get(compNameD, dataId) 

268 self.assertEqual(data, metric.data) 

269 

270 # Create a Dataset type that has the same name but is inconsistent. 

271 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions, 

272 self.storageClassFactory.getStorageClass("Config")) 

273 

274 # Getting with a dataset type that does not match registry fails 

275 with self.assertRaises(ValueError): 

276 butler.get(inconsistentDatasetType, dataId) 

277 

278 # Combining a DatasetRef with a dataId should fail 

279 with self.assertRaises(ValueError): 

280 butler.get(ref, dataId) 

281 # Getting with an explicit ref should fail if the id doesn't match 

282 with self.assertRaises(ValueError): 

283 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

284 

285 # Getting a dataset with unknown parameters should fail 

286 with self.assertRaises(KeyError): 

287 butler.get(ref, parameters={"unsupported": True}) 

288 

289 # Check we have a collection 

290 collections = set(butler.registry.queryCollections()) 

291 self.assertEqual(collections, {run, tag}) 

292 

293 # Clean up to check that we can remove something that may have 

294 # already had a component removed 

295 butler.prune([ref], unstore=True, purge=True) 

296 

297 # Add a dataset back in since some downstream tests require 

298 # something to be present 

299 ref = butler.put(metric, refIn) 

300 

301 return butler 

302 

303 def testDeferredCollectionPassing(self): 

304 # Construct a butler with no run or collection, but make it writeable. 

305 butler = Butler(self.tmpConfigFile, writeable=True) 

306 # Create and register a DatasetType 

307 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

308 datasetType = self.addDatasetType("example", dimensions, 

309 self.storageClassFactory.getStorageClass("StructuredData"), 

310 butler.registry) 

311 # Add needed Dimensions 

312 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

313 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

314 "name": "d-r", 

315 "abstract_filter": "R"}) 

316 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

317 "name": "fourtwentythree", "physical_filter": "d-r"}) 

318 dataId = {"instrument": "DummyCamComp", "visit": 423} 

319 # Create dataset. 

320 metric = makeExampleMetrics() 

321 # Register a new run and put dataset. 

322 run = "deferred" 

323 butler.registry.registerRun(run) 

324 ref = butler.put(metric, datasetType, dataId, run=run) 

325 # Putting with no run should fail with TypeError. 

326 with self.assertRaises(TypeError): 

327 butler.put(metric, datasetType, dataId) 

328 # Dataset should exist. 

329 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

330 # We should be able to get the dataset back, but with and without 

331 # a deferred dataset handle. 

332 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

333 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

334 # Trying to find the dataset without any collection is a TypeError. 

335 with self.assertRaises(TypeError): 

336 butler.datasetExists(datasetType, dataId) 

337 with self.assertRaises(TypeError): 

338 butler.get(datasetType, dataId) 

339 # Associate the dataset with a different collection. 

340 butler.registry.registerCollection("tagged") 

341 butler.registry.associate("tagged", [ref]) 

342 # Deleting the dataset from the new collection should make it findable 

343 # in the original collection. 

344 butler.prune([ref], tags=["tagged"]) 

345 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

346 

347 

348class ButlerTests(ButlerPutGetTests): 

349 """Tests for Butler. 

350 """ 

351 useTempRoot = True 

352 

353 def setUp(self): 

354 """Create a new butler root for each test.""" 

355 if self.useTempRoot: 

356 self.root = tempfile.mkdtemp(dir=TESTDIR) 

357 Butler.makeRepo(self.root, config=Config(self.configFile)) 

358 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

359 else: 

360 self.root = None 

361 self.tmpConfigFile = self.configFile 

362 

363 def testConstructor(self): 

364 """Independent test of constructor. 

365 """ 

366 butler = Butler(self.tmpConfigFile, run="ingest") 

367 self.assertIsInstance(butler, Butler) 

368 

369 collections = set(butler.registry.queryCollections()) 

370 self.assertEqual(collections, {"ingest"}) 

371 

372 butler2 = Butler(butler=butler, collections=["other"]) 

373 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"])) 

374 self.assertIsNone(butler2.run) 

375 self.assertIs(butler.registry, butler2.registry) 

376 self.assertIs(butler.datastore, butler2.datastore) 

377 

378 def testBasicPutGet(self): 

379 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

380 self.runPutGetTest(storageClass, "test_metric") 

381 

382 def testCompositePutGetConcrete(self): 

383 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

384 self.runPutGetTest(storageClass, "test_metric") 

385 

386 def testCompositePutGetVirtual(self): 

387 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

388 self.runPutGetTest(storageClass, "test_metric_comp") 

389 

390 def testIngest(self): 

391 butler = Butler(self.tmpConfigFile, run="ingest") 

392 

393 # Create and register a DatasetType 

394 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

395 

396 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

397 datasetTypeName = "metric" 

398 

399 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

400 

401 # Add needed Dimensions 

402 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

403 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

404 "name": "d-r", 

405 "abstract_filter": "R"}) 

406 for detector in (1, 2): 

407 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

408 "full_name": f"detector{detector}"}) 

409 

410 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

411 "name": "fourtwentythree", "physical_filter": "d-r"}, 

412 {"instrument": "DummyCamComp", "id": 424, 

413 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

414 

415 formatter = doImport("lsst.daf.butler.formatters.yamlFormatter.YamlFormatter") 

416 dataRoot = os.path.join(TESTDIR, "data", "basic") 

417 datasets = [] 

418 for detector in (1, 2): 

419 detector_name = f"detector_{detector}" 

420 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

421 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

422 # Create a DatasetRef for ingest 

423 refIn = DatasetRef(datasetType, dataId, id=None) 

424 

425 datasets.append(FileDataset(path=metricFile, 

426 refs=[refIn], 

427 formatter=formatter)) 

428 

429 butler.ingest(*datasets, transfer="copy") 

430 

431 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

432 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

433 

434 metrics1 = butler.get(datasetTypeName, dataId1) 

435 metrics2 = butler.get(datasetTypeName, dataId2) 

436 self.assertNotEqual(metrics1, metrics2) 

437 

438 # Compare URIs 

439 uri1 = butler.getUri(datasetTypeName, dataId1) 

440 uri2 = butler.getUri(datasetTypeName, dataId2) 

441 self.assertNotEqual(uri1, uri2) 

442 

443 # Now do a multi-dataset but single file ingest 

444 metricFile = os.path.join(dataRoot, "detectors.yaml") 

445 refs = [] 

446 for detector in (1, 2): 

447 detector_name = f"detector_{detector}" 

448 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

449 # Create a DatasetRef for ingest 

450 refs.append(DatasetRef(datasetType, dataId, id=None)) 

451 

452 datasets = [] 

453 datasets.append(FileDataset(path=metricFile, 

454 refs=refs, 

455 formatter=MultiDetectorFormatter)) 

456 

457 butler.ingest(*datasets, transfer="copy") 

458 

459 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

460 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

461 

462 multi1 = butler.get(datasetTypeName, dataId1) 

463 multi2 = butler.get(datasetTypeName, dataId2) 

464 

465 self.assertEqual(multi1, metrics1) 

466 self.assertEqual(multi2, metrics2) 

467 

468 # Compare URIs 

469 uri1 = butler.getUri(datasetTypeName, dataId1) 

470 uri2 = butler.getUri(datasetTypeName, dataId2) 

471 self.assertEqual(uri1, uri2) 

472 

473 # Test that removing one does not break the second 

474 butler.prune([datasets[0].refs[0]], unstore=True, disassociate=False) 

475 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

476 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

477 multi2b = butler.get(datasetTypeName, dataId2) 

478 self.assertEqual(multi2, multi2b) 

479 

480 def testPickle(self): 

481 """Test pickle support. 

482 """ 

483 butler = Butler(self.tmpConfigFile, run="ingest") 

484 butlerOut = pickle.loads(pickle.dumps(butler)) 

485 self.assertIsInstance(butlerOut, Butler) 

486 self.assertEqual(butlerOut._config, butler._config) 

487 self.assertEqual(butlerOut.collections, butler.collections) 

488 self.assertEqual(butlerOut.run, butler.run) 

489 

490 def testGetDatasetTypes(self): 

491 butler = Butler(self.tmpConfigFile, run="ingest") 

492 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

493 dimensionEntries = [ 

494 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

495 {"instrument": "DummyCamComp"}), 

496 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}), 

497 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

498 ] 

499 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

500 # Add needed Dimensions 

501 for args in dimensionEntries: 

502 butler.registry.insertDimensionData(*args) 

503 

504 # When a DatasetType is added to the registry entries are created 

505 # for each component. Need entries for each component in the test 

506 # configuration otherwise validation won't work. The ones that 

507 # are deliberately broken will be ignored later. 

508 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi"} 

509 components = set() 

510 for datasetTypeName in datasetTypeNames: 

511 # Create and register a DatasetType 

512 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

513 

514 for componentName in storageClass.components: 

515 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

516 

517 fromRegistry = set(butler.registry.queryDatasetTypes()) 

518 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

519 

520 # Now that we have some dataset types registered, validate them 

521 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

522 "datasetType.component"]) 

523 

524 # Add a new datasetType that will fail template validation 

525 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

526 if self.validationCanFail: 

527 with self.assertRaises(ValidationError): 

528 butler.validateConfiguration() 

529 

530 # Rerun validation but with a subset of dataset type names 

531 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

532 

533 # Rerun validation but ignore the bad datasetType 

534 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

535 "datasetType.component"]) 

536 

537 def testTransaction(self): 

538 butler = Butler(self.tmpConfigFile, run="ingest") 

539 datasetTypeName = "test_metric" 

540 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

541 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

542 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

543 "abstract_filter": "R"}), 

544 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

545 "physical_filter": "d-r"})) 

546 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

547 metric = makeExampleMetrics() 

548 dataId = {"instrument": "DummyCam", "visit": 42} 

549 with self.assertRaises(TransactionTestError): 

550 with butler.transaction(): 

551 # Create and register a DatasetType 

552 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

553 # Add needed Dimensions 

554 for args in dimensionEntries: 

555 butler.registry.insertDimensionData(*args) 

556 # Store a dataset 

557 ref = butler.put(metric, datasetTypeName, dataId) 

558 self.assertIsInstance(ref, DatasetRef) 

559 # Test getDirect 

560 metricOut = butler.getDirect(ref) 

561 self.assertEqual(metric, metricOut) 

562 # Test get 

563 metricOut = butler.get(datasetTypeName, dataId) 

564 self.assertEqual(metric, metricOut) 

565 # Check we can get components 

566 self.assertGetComponents(butler, ref, 

567 ("summary", "data", "output"), metric) 

568 raise TransactionTestError("This should roll back the entire transaction") 

569 

570 with self.assertRaises(KeyError): 

571 butler.registry.getDatasetType(datasetTypeName) 

572 with self.assertRaises(LookupError): 

573 butler.registry.expandDataId(dataId) 

574 # Should raise KeyError for missing DatasetType 

575 with self.assertRaises(KeyError): 

576 butler.get(datasetTypeName, dataId) 

577 # Also check explicitly if Dataset entry is missing 

578 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

579 # Direct retrieval should not find the file in the Datastore 

580 with self.assertRaises(FileNotFoundError): 

581 butler.getDirect(ref) 

582 

583 def testMakeRepo(self): 

584 """Test that we can write butler configuration to a new repository via 

585 the Butler.makeRepo interface and then instantiate a butler from the 

586 repo root. 

587 """ 

588 # Do not run the test if we know this datastore configuration does 

589 # not support a file system root 

590 if self.fullConfigKey is None: 

591 return 

592 

593 # Remove the file created in setUp 

594 os.unlink(self.tmpConfigFile) 

595 

596 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile)) 

597 limited = Config(self.configFile) 

598 butler1 = Butler(butlerConfig) 

599 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

600 config=Config(self.configFile), overwrite=True) 

601 full = Config(self.tmpConfigFile) 

602 butler2 = Butler(butlerConfig) 

603 # Butlers should have the same configuration regardless of whether 

604 # defaults were expanded. 

605 self.assertEqual(butler1._config, butler2._config) 

606 # Config files loaded directly should not be the same. 

607 self.assertNotEqual(limited, full) 

608 # Make sure "limited" doesn't have a few keys we know it should be 

609 # inheriting from defaults. 

610 self.assertIn(self.fullConfigKey, full) 

611 self.assertNotIn(self.fullConfigKey, limited) 

612 

613 # Collections don't appear until something is put in them 

614 collections1 = set(butler1.registry.queryCollections()) 

615 self.assertEqual(collections1, set()) 

616 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

617 

618 # Check that a config with no associated file name will not 

619 # work properly with relocatable Butler repo 

620 butlerConfig.configFile = None 

621 with self.assertRaises(ValueError): 

622 Butler(butlerConfig) 

623 

624 with self.assertRaises(FileExistsError): 

625 Butler.makeRepo(self.root, standalone=True, createRegistry=False, 

626 config=Config(self.configFile), overwrite=False) 

627 

628 def testStringification(self): 

629 butler = Butler(self.tmpConfigFile, run="ingest") 

630 butlerStr = str(butler) 

631 

632 if self.datastoreStr is not None: 

633 for testStr in self.datastoreStr: 

634 self.assertIn(testStr, butlerStr) 

635 if self.registryStr is not None: 

636 self.assertIn(self.registryStr, butlerStr) 

637 

638 datastoreName = butler.datastore.name 

639 if self.datastoreName is not None: 

640 for testStr in self.datastoreName: 

641 self.assertIn(testStr, datastoreName) 

642 

643 

644class FileLikeDatastoreButlerTests(ButlerTests): 

645 """Common tests and specialization of ButlerTests for butlers backed 

646 by datastores that inherit from FileLikeDatastore. 

647 """ 

648 

649 def checkFileExists(self, root, path): 

650 """Checks if file exists at a given path (relative to root). 

651 

652 Test testPutTemplates verifies actual physical existance of the files 

653 in the requested location. For POSIXDatastore this test is equivalent 

654 to `os.path.exists` call. 

655 """ 

656 return os.path.exists(os.path.join(root, path)) 

657 

658 def testPutTemplates(self): 

659 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

660 butler = Butler(self.tmpConfigFile, run="ingest") 

661 

662 # Add needed Dimensions 

663 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

664 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

665 "name": "d-r", 

666 "abstract_filter": "R"}) 

667 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

668 "physical_filter": "d-r"}) 

669 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

670 "physical_filter": "d-r"}) 

671 

672 # Create and store a dataset 

673 metric = makeExampleMetrics() 

674 

675 # Create two almost-identical DatasetTypes (both will use default 

676 # template) 

677 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

678 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

679 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

680 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

681 

682 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

683 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

684 dataId3 = {"instrument": "DummyCamComp", "visit": 425} 

685 

686 # Put with exactly the data ID keys needed 

687 ref = butler.put(metric, "metric1", dataId1) 

688 self.assertTrue(self.checkFileExists(butler.datastore.root, 

689 "ingest/metric1/d-r/DummyCamComp_423.pickle")) 

690 

691 # Check the template based on dimensions 

692 butler.datastore.templates.validateTemplates([ref]) 

693 

694 # Put with extra data ID keys (physical_filter is an optional 

695 # dependency); should not change template (at least the way we're 

696 # defining them to behave now; the important thing is that they 

697 # must be consistent). 

698 ref = butler.put(metric, "metric2", dataId2) 

699 self.assertTrue(self.checkFileExists(butler.datastore.root, 

700 "ingest/metric2/d-r/DummyCamComp_v423.pickle")) 

701 

702 # Check the template based on dimensions 

703 butler.datastore.templates.validateTemplates([ref]) 

704 

705 # Now use a file template that will not result in unique filenames 

706 ref = butler.put(metric, "metric3", dataId1) 

707 

708 # Check the template based on dimensions. This one is a bad template 

709 with self.assertRaises(FileTemplateValidationError): 

710 butler.datastore.templates.validateTemplates([ref]) 

711 

712 with self.assertRaises(FileExistsError): 

713 butler.put(metric, "metric3", dataId3) 

714 

715 def testImportExport(self): 

716 # Run put/get tests just to create and populate a repo. 

717 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

718 exportButler = self.runPutGetTest(storageClass, "test_metric") 

719 # Test that the repo actually has at least one dataset. 

720 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

721 self.assertGreater(len(datasets), 0) 

722 # Export those datasets. We used TemporaryDirectory because there 

723 # doesn't seem to be a way to get the filename (as opposed to the file 

724 # object) from any of tempfile's temporary-file context managers. 

725 with tempfile.TemporaryDirectory() as exportDir: 

726 # TODO: When PosixDatastore supports transfer-on-exist, add tests 

727 # for that. 

728 exportFile = os.path.join(exportDir, "exports.yaml") 

729 with exportButler.export(filename=exportFile) as export: 

730 export.saveDatasets(datasets) 

731 self.assertTrue(os.path.exists(exportFile)) 

732 with tempfile.TemporaryDirectory() as importDir: 

733 Butler.makeRepo(importDir, config=Config(self.configFile)) 

734 importButler = Butler(importDir, run="ingest/run") 

735 importButler.import_(filename=exportFile, directory=exportButler.datastore.root, 

736 transfer="symlink") 

737 for ref in datasets: 

738 with self.subTest(ref=ref): 

739 # Test for existence by passing in the DatasetType and 

740 # data ID separately, to avoid lookup by dataset_id. 

741 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

742 

743 

744class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

745 """PosixDatastore specialization of a butler""" 

746 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

747 fullConfigKey = ".datastore.formatters" 

748 validationCanFail = True 

749 datastoreStr = ["/tmp"] 

750 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"] 

751 registryStr = "/gen3.sqlite3" 

752 

753 

754class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

755 """InMemoryDatastore specialization of a butler""" 

756 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

757 fullConfigKey = None 

758 useTempRoot = False 

759 validationCanFail = False 

760 datastoreStr = ["datastore='InMemory"] 

761 datastoreName = ["InMemoryDatastore@"] 

762 registryStr = ":memory:" 

763 

764 def testIngest(self): 

765 pass 

766 

767 

768class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

769 """PosixDatastore specialization""" 

770 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

771 fullConfigKey = ".datastore.datastores.1.formatters" 

772 validationCanFail = True 

773 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"] 

774 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1", 

775 "SecondDatastore"] 

776 registryStr = "/gen3.sqlite3" 

777 

778 

779class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

780 """Test that a yaml file in one location can refer to a root in another.""" 

781 

782 datastoreStr = ["dir1"] 

783 # Disable the makeRepo test since we are deliberately not using 

784 # butler.yaml as the config name. 

785 fullConfigKey = None 

786 

787 def setUp(self): 

788 self.root = tempfile.mkdtemp(dir=TESTDIR) 

789 

790 # Make a new repository in one place 

791 self.dir1 = os.path.join(self.root, "dir1") 

792 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

793 

794 # Move the yaml file to a different place and add a "root" 

795 self.dir2 = os.path.join(self.root, "dir2") 

796 safeMakeDir(self.dir2) 

797 configFile1 = os.path.join(self.dir1, "butler.yaml") 

798 config = Config(configFile1) 

799 config["root"] = self.dir1 

800 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

801 config.dumpToFile(configFile2) 

802 os.remove(configFile1) 

803 self.tmpConfigFile = configFile2 

804 

805 def testFileLocations(self): 

806 self.assertNotEqual(self.dir1, self.dir2) 

807 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

808 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

809 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

810 

811 

812class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

813 """Test that a config file created by makeRepo outside of repo works.""" 

814 

815 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

816 

817 def setUp(self): 

818 self.root = tempfile.mkdtemp(dir=TESTDIR) 

819 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

820 

821 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

822 Butler.makeRepo(self.root, config=Config(self.configFile), 

823 outfile=self.tmpConfigFile) 

824 

825 def tearDown(self): 

826 if os.path.exists(self.root2): 

827 shutil.rmtree(self.root2, ignore_errors=True) 

828 super().tearDown() 

829 

830 def testConfigExistence(self): 

831 c = Config(self.tmpConfigFile) 

832 uri_config = ButlerURI(c["root"]) 

833 uri_expected = ButlerURI(self.root) 

834 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

835 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

836 

837 def testPutGet(self): 

838 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

839 self.runPutGetTest(storageClass, "test_metric") 

840 

841 

842class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

843 """Test that a config file created by makeRepo outside of repo works.""" 

844 

845 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

846 

847 def setUp(self): 

848 self.root = tempfile.mkdtemp(dir=TESTDIR) 

849 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

850 

851 self.tmpConfigFile = self.root2 

852 Butler.makeRepo(self.root, config=Config(self.configFile), 

853 outfile=self.tmpConfigFile) 

854 

855 def testConfigExistence(self): 

856 # Append the yaml file else Config constructor does not know the file 

857 # type. 

858 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

859 super().testConfigExistence() 

860 

861 

862class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

863 """Test that a config file created by makeRepo outside of repo works.""" 

864 

865 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

866 

867 def setUp(self): 

868 self.root = tempfile.mkdtemp(dir=TESTDIR) 

869 self.root2 = tempfile.mkdtemp(dir=TESTDIR) 

870 

871 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

872 Butler.makeRepo(self.root, config=Config(self.configFile), 

873 outfile=self.tmpConfigFile) 

874 

875 

876@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

877@mock_s3 

878class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase): 

879 """S3Datastore specialization of a butler; an S3 storage Datastore + 

880 a local in-memory SqlRegistry. 

881 """ 

882 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

883 fullConfigKey = None 

884 validationCanFail = True 

885 

886 bucketName = "anybucketname" 

887 """Name of the Bucket that will be used in the tests. The name is read from 

888 the config file used with the tests during set-up. 

889 """ 

890 

891 root = "butlerRoot/" 

892 """Root repository directory expected to be used in case useTempRoot=False. 

893 Otherwise the root is set to a 20 characters long randomly generated string 

894 during set-up. 

895 """ 

896 

897 datastoreStr = [f"datastore={root}"] 

898 """Contains all expected root locations in a format expected to be 

899 returned by Butler stringification. 

900 """ 

901 

902 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"] 

903 """The expected format of the S3Datastore string.""" 

904 

905 registryStr = f":memory:" 

906 """Expected format of the Registry string.""" 

907 

908 def genRoot(self): 

909 """Returns a random string of len 20 to serve as a root 

910 name for the temporary bucket repo. 

911 

912 This is equivalent to tempfile.mkdtemp as this is what self.root 

913 becomes when useTempRoot is True. 

914 """ 

915 rndstr = "".join( 

916 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

917 ) 

918 return rndstr + "/" 

919 

920 def setUp(self): 

921 config = Config(self.configFile) 

922 uri = ButlerURI(config[".datastore.datastore.root"]) 

923 self.bucketName = uri.netloc 

924 

925 # set up some fake credentials if they do not exist 

926 self.usingDummyCredentials = setAwsEnvCredentials() 

927 

928 if self.useTempRoot: 

929 self.root = self.genRoot() 

930 rooturi = f"s3://{self.bucketName}/{self.root}" 

931 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

932 

933 # MOTO needs to know that we expect Bucket bucketname to exist 

934 # (this used to be the class attribute bucketName) 

935 s3 = boto3.resource("s3") 

936 s3.create_bucket(Bucket=self.bucketName) 

937 

938 self.datastoreStr = f"datastore={self.root}" 

939 self.datastoreName = [f"S3Datastore@{rooturi}"] 

940 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

941 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

942 

943 def tearDown(self): 

944 s3 = boto3.resource("s3") 

945 bucket = s3.Bucket(self.bucketName) 

946 try: 

947 bucket.objects.all().delete() 

948 except botocore.exceptions.ClientError as e: 

949 if e.response["Error"]["Code"] == "404": 

950 # the key was not reachable - pass 

951 pass 

952 else: 

953 raise 

954 

955 bucket = s3.Bucket(self.bucketName) 

956 bucket.delete() 

957 

958 # unset any potentially set dummy credentials 

959 if self.usingDummyCredentials: 

960 unsetAwsEnvCredentials() 

961 

962 def checkFileExists(self, root, relpath): 

963 """Checks if file exists at a given path (relative to root). 

964 

965 Test testPutTemplates verifies actual physical existance of the files 

966 in the requested location. For S3Datastore this test is equivalent to 

967 `lsst.daf.butler.core.s3utils.s3checkFileExists` call. 

968 """ 

969 uri = ButlerURI(root) 

970 client = boto3.client("s3") 

971 return s3CheckFileExists(uri, client=client)[0] 

972 

973 @unittest.expectedFailure 

974 def testImportExport(self): 

975 super().testImportExport() 

976 

977 

978if __name__ == "__main__": 978 ↛ 979line 978 didn't jump to line 979, because the condition on line 978 was never true

979 unittest.main()