Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import os 

26import posixpath 

27import unittest 

28import tempfile 

29import shutil 

30import pickle 

31import string 

32import random 

33import time 

34import socket 

35 

36try: 

37 import boto3 

38 import botocore 

39 from moto import mock_s3 

40except ImportError: 

41 boto3 = None 

42 

43 def mock_s3(cls): 

44 """A no-op decorator in case moto mock_s3 can not be imported. 

45 """ 

46 return cls 

47 

48try: 

49 from cheroot import wsgi 

50 from wsgidav.wsgidav_app import WsgiDAVApp 

51except ImportError: 

52 WsgiDAVApp = None 

53 

54import astropy.time 

55from threading import Thread 

56from tempfile import gettempdir 

57from lsst.utils import doImport 

58from lsst.daf.butler.core.utils import safeMakeDir 

59from lsst.daf.butler import Butler, Config, ButlerConfig 

60from lsst.daf.butler import StorageClassFactory 

61from lsst.daf.butler import DatasetType, DatasetRef 

62from lsst.daf.butler import FileTemplateValidationError, ValidationError 

63from lsst.daf.butler import FileDataset 

64from lsst.daf.butler import CollectionSearch, CollectionType 

65from lsst.daf.butler import ButlerURI 

66from lsst.daf.butler import script 

67from lsst.daf.butler.registry import MissingCollectionError 

68from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

69from lsst.daf.butler.core._butlerUri.s3utils import (setAwsEnvCredentials, 

70 unsetAwsEnvCredentials) 

71from lsst.daf.butler.core._butlerUri.http import isWebdavEndpoint 

72 

73from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

74from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

75 

76TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

77 

78 

79def makeExampleMetrics(): 

80 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

81 {"a": [1, 2, 3], 

82 "b": {"blue": 5, "red": "green"}}, 

83 [563, 234, 456.7, 752, 8, 9, 27] 

84 ) 

85 

86 

87class TransactionTestError(Exception): 

88 """Specific error for testing transactions, to prevent misdiagnosing 

89 that might otherwise occur when a standard exception is used. 

90 """ 

91 pass 

92 

93 

94class ButlerConfigTests(unittest.TestCase): 

95 """Simple tests for ButlerConfig that are not tested in other test cases. 

96 """ 

97 

98 def testSearchPath(self): 

99 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

100 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

101 config1 = ButlerConfig(configFile) 

102 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

103 

104 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

105 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

106 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

107 self.assertIn("testConfigs", "\n".join(cm.output)) 

108 

109 key = ("datastore", "records", "table") 

110 self.assertNotEqual(config1[key], config2[key]) 

111 self.assertEqual(config2[key], "override_record") 

112 

113 

114class ButlerPutGetTests: 

115 """Helper method for running a suite of put/get tests from different 

116 butler configurations.""" 

117 

118 root = None 

119 

120 @staticmethod 

121 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

122 """Create a DatasetType and register it 

123 """ 

124 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

125 registry.registerDatasetType(datasetType) 

126 return datasetType 

127 

128 @classmethod 

129 def setUpClass(cls): 

130 cls.storageClassFactory = StorageClassFactory() 

131 cls.storageClassFactory.addFromConfig(cls.configFile) 

132 

133 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

134 datasetType = datasetRef.datasetType 

135 dataId = datasetRef.dataId 

136 deferred = butler.getDirectDeferred(datasetRef) 

137 

138 for component in components: 

139 compTypeName = datasetType.componentTypeName(component) 

140 result = butler.get(compTypeName, dataId, collections=collections) 

141 self.assertEqual(result, getattr(reference, component)) 

142 result_deferred = deferred.get(component=component) 

143 self.assertEqual(result_deferred, result) 

144 

145 def tearDown(self): 

146 removeTestTempDir(self.root) 

147 

148 def runPutGetTest(self, storageClass, datasetTypeName): 

149 # New datasets will be added to run and tag, but we will only look in 

150 # tag when looking up datasets. 

151 run = "ingest" 

152 butler = Butler(self.tmpConfigFile, run=run) 

153 

154 collections = set(butler.registry.queryCollections()) 

155 self.assertEqual(collections, set([run])) 

156 

157 # Create and register a DatasetType 

158 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

159 

160 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

161 

162 # Add needed Dimensions 

163 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

164 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

165 "name": "d-r", 

166 "band": "R"}) 

167 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp", 

168 "id": 1, 

169 "name": "default"}) 

170 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

171 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

172 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

173 "name": "fourtwentythree", "physical_filter": "d-r", 

174 "visit_system": 1, "datetime_begin": visit_start, 

175 "datetime_end": visit_end}) 

176 

177 # Add a second visit for some later tests 

178 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424, 

179 "name": "fourtwentyfour", "physical_filter": "d-r", 

180 "visit_system": 1}) 

181 

182 # Create and store a dataset 

183 metric = makeExampleMetrics() 

184 dataId = {"instrument": "DummyCamComp", "visit": 423} 

185 

186 # Create a DatasetRef for put 

187 refIn = DatasetRef(datasetType, dataId, id=None) 

188 

189 # Put with a preexisting id should fail 

190 with self.assertRaises(ValueError): 

191 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

192 

193 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

194 # and once with a DatasetType 

195 

196 # Keep track of any collections we add and do not clean up 

197 expected_collections = {run} 

198 

199 counter = 0 

200 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

201 # Since we are using subTest we can get cascading failures 

202 # here with the first attempt failing and the others failing 

203 # immediately because the dataset already exists. Work around 

204 # this by using a distinct run collection each time 

205 counter += 1 

206 this_run = f"put_run_{counter}" 

207 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

208 expected_collections.update({this_run}) 

209 

210 with self.subTest(args=args): 

211 ref = butler.put(metric, *args, run=this_run) 

212 self.assertIsInstance(ref, DatasetRef) 

213 

214 # Test getDirect 

215 metricOut = butler.getDirect(ref) 

216 self.assertEqual(metric, metricOut) 

217 # Test get 

218 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

219 self.assertEqual(metric, metricOut) 

220 # Test get with a datasetRef 

221 metricOut = butler.get(ref, collections=this_run) 

222 self.assertEqual(metric, metricOut) 

223 # Test getDeferred with dataId 

224 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

225 self.assertEqual(metric, metricOut) 

226 # Test getDeferred with a datasetRef 

227 metricOut = butler.getDeferred(ref, collections=this_run).get() 

228 self.assertEqual(metric, metricOut) 

229 # and deferred direct with ref 

230 metricOut = butler.getDirectDeferred(ref).get() 

231 self.assertEqual(metric, metricOut) 

232 

233 # Check we can get components 

234 if storageClass.isComposite(): 

235 self.assertGetComponents(butler, ref, 

236 ("summary", "data", "output"), metric, 

237 collections=this_run) 

238 

239 # Can the artifacts themselves be retrieved? 

240 if not butler.datastore.isEphemeral: 

241 root_uri = ButlerURI(self.root) 

242 

243 for preserve_path in (True, False): 

244 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

245 transferred = butler.retrieveArtifacts([ref], destination, 

246 preserve_path=preserve_path) 

247 self.assertGreater(len(transferred), 0) 

248 artifacts = list(ButlerURI.findFileResources([destination])) 

249 self.assertEqual(set(transferred), set(artifacts)) 

250 

251 for artifact in transferred: 

252 path_in_destination = artifact.relative_to(destination) 

253 self.assertIsNotNone(path_in_destination) 

254 

255 # when path is not preserved there should not be 

256 # any path separators. 

257 num_seps = path_in_destination.count("/") 

258 if preserve_path: 

259 self.assertGreater(num_seps, 0) 

260 else: 

261 self.assertEqual(num_seps, 0) 

262 

263 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

264 n_uris = len(secondary_uris) 

265 if primary_uri: 

266 n_uris += 1 

267 self.assertEqual(len(artifacts), n_uris, "Comparing expected artifacts vs actual:" 

268 f" {artifacts} vs {primary_uri} and {secondary_uris}") 

269 

270 if preserve_path: 

271 # No need to run these twice 

272 with self.assertRaises(ValueError): 

273 butler.retrieveArtifacts([ref], destination, transfer="move") 

274 

275 with self.assertRaises(FileExistsError): 

276 butler.retrieveArtifacts([ref], destination) 

277 

278 transferred_again = butler.retrieveArtifacts([ref], destination, 

279 preserve_path=preserve_path, 

280 overwrite=True) 

281 self.assertEqual(set(transferred_again), set(transferred)) 

282 

283 # Now remove the dataset completely. 

284 butler.pruneDatasets([ref], purge=True, unstore=True, run=this_run) 

285 # Lookup with original args should still fail. 

286 with self.assertRaises(LookupError): 

287 butler.datasetExists(*args, collections=this_run) 

288 # getDirect() should still fail. 

289 with self.assertRaises(FileNotFoundError): 

290 butler.getDirect(ref) 

291 # Registry shouldn't be able to find it by dataset_id anymore. 

292 self.assertIsNone(butler.registry.getDataset(ref.id)) 

293 

294 # Do explicit registry removal since we know they are 

295 # empty 

296 butler.registry.removeCollection(this_run) 

297 expected_collections.remove(this_run) 

298 

299 # Put the dataset again, since the last thing we did was remove it 

300 # and we want to use the default collection. 

301 ref = butler.put(metric, refIn) 

302 

303 # Get with parameters 

304 stop = 4 

305 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

306 self.assertNotEqual(metric, sliced) 

307 self.assertEqual(metric.summary, sliced.summary) 

308 self.assertEqual(metric.output, sliced.output) 

309 self.assertEqual(metric.data[:stop], sliced.data) 

310 # getDeferred with parameters 

311 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

312 self.assertNotEqual(metric, sliced) 

313 self.assertEqual(metric.summary, sliced.summary) 

314 self.assertEqual(metric.output, sliced.output) 

315 self.assertEqual(metric.data[:stop], sliced.data) 

316 # getDeferred with deferred parameters 

317 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

318 self.assertNotEqual(metric, sliced) 

319 self.assertEqual(metric.summary, sliced.summary) 

320 self.assertEqual(metric.output, sliced.output) 

321 self.assertEqual(metric.data[:stop], sliced.data) 

322 

323 if storageClass.isComposite(): 

324 # Check that components can be retrieved 

325 metricOut = butler.get(ref.datasetType.name, dataId) 

326 compNameS = ref.datasetType.componentTypeName("summary") 

327 compNameD = ref.datasetType.componentTypeName("data") 

328 summary = butler.get(compNameS, dataId) 

329 self.assertEqual(summary, metric.summary) 

330 data = butler.get(compNameD, dataId) 

331 self.assertEqual(data, metric.data) 

332 

333 if "counter" in storageClass.derivedComponents: 

334 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

335 self.assertEqual(count, len(data)) 

336 

337 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId, 

338 parameters={"slice": slice(stop)}) 

339 self.assertEqual(count, stop) 

340 

341 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

342 summary = butler.getDirect(compRef) 

343 self.assertEqual(summary, metric.summary) 

344 

345 # Create a Dataset type that has the same name but is inconsistent. 

346 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions, 

347 self.storageClassFactory.getStorageClass("Config")) 

348 

349 # Getting with a dataset type that does not match registry fails 

350 with self.assertRaises(ValueError): 

351 butler.get(inconsistentDatasetType, dataId) 

352 

353 # Combining a DatasetRef with a dataId should fail 

354 with self.assertRaises(ValueError): 

355 butler.get(ref, dataId) 

356 # Getting with an explicit ref should fail if the id doesn't match 

357 with self.assertRaises(ValueError): 

358 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

359 

360 # Getting a dataset with unknown parameters should fail 

361 with self.assertRaises(KeyError): 

362 butler.get(ref, parameters={"unsupported": True}) 

363 

364 # Check we have a collection 

365 collections = set(butler.registry.queryCollections()) 

366 self.assertEqual(collections, expected_collections) 

367 

368 # Clean up to check that we can remove something that may have 

369 # already had a component removed 

370 butler.pruneDatasets([ref], unstore=True, purge=True) 

371 

372 # Add a dataset back in since some downstream tests require 

373 # something to be present 

374 ref = butler.put(metric, refIn) 

375 

376 return butler 

377 

378 def testDeferredCollectionPassing(self): 

379 # Construct a butler with no run or collection, but make it writeable. 

380 butler = Butler(self.tmpConfigFile, writeable=True) 

381 # Create and register a DatasetType 

382 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

383 datasetType = self.addDatasetType("example", dimensions, 

384 self.storageClassFactory.getStorageClass("StructuredData"), 

385 butler.registry) 

386 # Add needed Dimensions 

387 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

388 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

389 "name": "d-r", 

390 "band": "R"}) 

391 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

392 "name": "fourtwentythree", "physical_filter": "d-r"}) 

393 dataId = {"instrument": "DummyCamComp", "visit": 423} 

394 # Create dataset. 

395 metric = makeExampleMetrics() 

396 # Register a new run and put dataset. 

397 run = "deferred" 

398 butler.registry.registerRun(run) 

399 ref = butler.put(metric, datasetType, dataId, run=run) 

400 # Putting with no run should fail with TypeError. 

401 with self.assertRaises(TypeError): 

402 butler.put(metric, datasetType, dataId) 

403 # Dataset should exist. 

404 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

405 # We should be able to get the dataset back, but with and without 

406 # a deferred dataset handle. 

407 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

408 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

409 # Trying to find the dataset without any collection is a TypeError. 

410 with self.assertRaises(TypeError): 

411 butler.datasetExists(datasetType, dataId) 

412 with self.assertRaises(TypeError): 

413 butler.get(datasetType, dataId) 

414 # Associate the dataset with a different collection. 

415 butler.registry.registerCollection("tagged") 

416 butler.registry.associate("tagged", [ref]) 

417 # Deleting the dataset from the new collection should make it findable 

418 # in the original collection. 

419 butler.pruneDatasets([ref], tags=["tagged"]) 

420 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

421 

422 

423class ButlerTests(ButlerPutGetTests): 

424 """Tests for Butler. 

425 """ 

426 useTempRoot = True 

427 

428 def setUp(self): 

429 """Create a new butler root for each test.""" 

430 self.root = makeTestTempDir(TESTDIR) 

431 Butler.makeRepo(self.root, config=Config(self.configFile)) 

432 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

433 

434 def testConstructor(self): 

435 """Independent test of constructor. 

436 """ 

437 butler = Butler(self.tmpConfigFile, run="ingest") 

438 self.assertIsInstance(butler, Butler) 

439 

440 collections = set(butler.registry.queryCollections()) 

441 self.assertEqual(collections, {"ingest"}) 

442 

443 butler2 = Butler(butler=butler, collections=["other"]) 

444 self.assertEqual( 

445 butler2.collections, 

446 CollectionSearch.fromExpression(["other"]) 

447 ) 

448 self.assertIsNone(butler2.run) 

449 self.assertIs(butler.datastore, butler2.datastore) 

450 

451 def testBasicPutGet(self): 

452 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

453 self.runPutGetTest(storageClass, "test_metric") 

454 

455 def testCompositePutGetConcrete(self): 

456 

457 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

458 butler = self.runPutGetTest(storageClass, "test_metric") 

459 

460 # Should *not* be disassembled 

461 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

462 self.assertEqual(len(datasets), 1) 

463 uri, components = butler.getURIs(datasets[0]) 

464 self.assertIsInstance(uri, ButlerURI) 

465 self.assertFalse(components) 

466 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

467 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

468 

469 # Predicted dataset 

470 dataId = {"instrument": "DummyCamComp", "visit": 424} 

471 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

472 self.assertFalse(components) 

473 self.assertIsInstance(uri, ButlerURI) 

474 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

475 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

476 

477 def testCompositePutGetVirtual(self): 

478 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

479 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

480 

481 # Should be disassembled 

482 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

483 self.assertEqual(len(datasets), 1) 

484 uri, components = butler.getURIs(datasets[0]) 

485 

486 if butler.datastore.isEphemeral: 

487 # Never disassemble in-memory datastore 

488 self.assertIsInstance(uri, ButlerURI) 

489 self.assertFalse(components) 

490 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

491 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

492 else: 

493 self.assertIsNone(uri) 

494 self.assertEqual(set(components), set(storageClass.components)) 

495 for compuri in components.values(): 

496 self.assertIsInstance(compuri, ButlerURI) 

497 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

498 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

499 

500 # Predicted dataset 

501 dataId = {"instrument": "DummyCamComp", "visit": 424} 

502 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

503 

504 if butler.datastore.isEphemeral: 

505 # Never disassembled 

506 self.assertIsInstance(uri, ButlerURI) 

507 self.assertFalse(components) 

508 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

509 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

510 else: 

511 self.assertIsNone(uri) 

512 self.assertEqual(set(components), set(storageClass.components)) 

513 for compuri in components.values(): 

514 self.assertIsInstance(compuri, ButlerURI) 

515 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

516 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

517 

518 def testIngest(self): 

519 butler = Butler(self.tmpConfigFile, run="ingest") 

520 

521 # Create and register a DatasetType 

522 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

523 

524 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

525 datasetTypeName = "metric" 

526 

527 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

528 

529 # Add needed Dimensions 

530 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

531 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

532 "name": "d-r", 

533 "band": "R"}) 

534 for detector in (1, 2): 

535 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

536 "full_name": f"detector{detector}"}) 

537 

538 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

539 "name": "fourtwentythree", "physical_filter": "d-r"}, 

540 {"instrument": "DummyCamComp", "id": 424, 

541 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

542 

543 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

544 dataRoot = os.path.join(TESTDIR, "data", "basic") 

545 datasets = [] 

546 for detector in (1, 2): 

547 detector_name = f"detector_{detector}" 

548 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

549 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

550 # Create a DatasetRef for ingest 

551 refIn = DatasetRef(datasetType, dataId, id=None) 

552 

553 datasets.append(FileDataset(path=metricFile, 

554 refs=[refIn], 

555 formatter=formatter)) 

556 

557 butler.ingest(*datasets, transfer="copy") 

558 

559 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

560 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

561 

562 metrics1 = butler.get(datasetTypeName, dataId1) 

563 metrics2 = butler.get(datasetTypeName, dataId2) 

564 self.assertNotEqual(metrics1, metrics2) 

565 

566 # Compare URIs 

567 uri1 = butler.getURI(datasetTypeName, dataId1) 

568 uri2 = butler.getURI(datasetTypeName, dataId2) 

569 self.assertNotEqual(uri1, uri2) 

570 

571 # Now do a multi-dataset but single file ingest 

572 metricFile = os.path.join(dataRoot, "detectors.yaml") 

573 refs = [] 

574 for detector in (1, 2): 

575 detector_name = f"detector_{detector}" 

576 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

577 # Create a DatasetRef for ingest 

578 refs.append(DatasetRef(datasetType, dataId, id=None)) 

579 

580 datasets = [] 

581 datasets.append(FileDataset(path=metricFile, 

582 refs=refs, 

583 formatter=MultiDetectorFormatter)) 

584 

585 butler.ingest(*datasets, transfer="copy") 

586 

587 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

588 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

589 

590 multi1 = butler.get(datasetTypeName, dataId1) 

591 multi2 = butler.get(datasetTypeName, dataId2) 

592 

593 self.assertEqual(multi1, metrics1) 

594 self.assertEqual(multi2, metrics2) 

595 

596 # Compare URIs 

597 uri1 = butler.getURI(datasetTypeName, dataId1) 

598 uri2 = butler.getURI(datasetTypeName, dataId2) 

599 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

600 

601 # Test that removing one does not break the second 

602 # This line will issue a warning log message for a ChainedDatastore 

603 # that uses an InMemoryDatastore since in-memory can not ingest 

604 # files. 

605 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

606 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

607 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

608 multi2b = butler.get(datasetTypeName, dataId2) 

609 self.assertEqual(multi2, multi2b) 

610 

611 def testPruneCollections(self): 

612 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

613 butler = Butler(self.tmpConfigFile, writeable=True) 

614 # Load registry data with dimensions to hang datasets off of. 

615 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

616 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

617 # Add some RUN-type collections. 

618 run1 = "run1" 

619 butler.registry.registerRun(run1) 

620 run2 = "run2" 

621 butler.registry.registerRun(run2) 

622 # put some datasets. ref1 and ref2 have the same data ID, and are in 

623 # different runs. ref3 has a different data ID. 

624 metric = makeExampleMetrics() 

625 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

626 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

627 butler.registry) 

628 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

629 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

630 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

631 

632 # Try to delete a RUN collection without purge, or with purge and not 

633 # unstore. 

634 with self.assertRaises(TypeError): 

635 butler.pruneCollection(run1) 

636 with self.assertRaises(TypeError): 

637 butler.pruneCollection(run2, purge=True) 

638 # Add a TAGGED collection and associate ref3 only into it. 

639 tag1 = "tag1" 

640 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

641 butler.registry.associate(tag1, [ref3]) 

642 # Add a CHAINED collection that searches run1 and then run2. It 

643 # logically contains only ref1, because ref2 is shadowed due to them 

644 # having the same data ID and dataset type. 

645 chain1 = "chain1" 

646 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

647 butler.registry.setCollectionChain(chain1, [run1, run2]) 

648 # Try to delete RUN collections, which should fail with complete 

649 # rollback because they're still referenced by the CHAINED 

650 # collection. 

651 with self.assertRaises(Exception): 

652 butler.pruneCollection(run1, pruge=True, unstore=True) 

653 with self.assertRaises(Exception): 

654 butler.pruneCollection(run2, pruge=True, unstore=True) 

655 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

656 [ref1, ref2, ref3]) 

657 self.assertTrue(butler.datastore.exists(ref1)) 

658 self.assertTrue(butler.datastore.exists(ref2)) 

659 self.assertTrue(butler.datastore.exists(ref3)) 

660 # Try to delete CHAINED and TAGGED collections with purge; should not 

661 # work. 

662 with self.assertRaises(TypeError): 

663 butler.pruneCollection(tag1, purge=True, unstore=True) 

664 with self.assertRaises(TypeError): 

665 butler.pruneCollection(chain1, purge=True, unstore=True) 

666 # Remove the tagged collection with unstore=False. This should not 

667 # affect the datasets. 

668 butler.pruneCollection(tag1) 

669 with self.assertRaises(MissingCollectionError): 

670 butler.registry.getCollectionType(tag1) 

671 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

672 [ref1, ref2, ref3]) 

673 self.assertTrue(butler.datastore.exists(ref1)) 

674 self.assertTrue(butler.datastore.exists(ref2)) 

675 self.assertTrue(butler.datastore.exists(ref3)) 

676 # Add the tagged collection back in, and remove it with unstore=True. 

677 # This should remove ref3 only from the datastore. 

678 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

679 butler.registry.associate(tag1, [ref3]) 

680 butler.pruneCollection(tag1, unstore=True) 

681 with self.assertRaises(MissingCollectionError): 

682 butler.registry.getCollectionType(tag1) 

683 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

684 [ref1, ref2, ref3]) 

685 self.assertTrue(butler.datastore.exists(ref1)) 

686 self.assertTrue(butler.datastore.exists(ref2)) 

687 self.assertFalse(butler.datastore.exists(ref3)) 

688 # Delete the chain with unstore=False. The datasets should not be 

689 # affected at all. 

690 butler.pruneCollection(chain1) 

691 with self.assertRaises(MissingCollectionError): 

692 butler.registry.getCollectionType(chain1) 

693 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

694 [ref1, ref2, ref3]) 

695 self.assertTrue(butler.datastore.exists(ref1)) 

696 self.assertTrue(butler.datastore.exists(ref2)) 

697 self.assertFalse(butler.datastore.exists(ref3)) 

698 # Redefine and then delete the chain with unstore=True. Only ref1 

699 # should be unstored (ref3 has already been unstored, but otherwise 

700 # would be now). 

701 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

702 butler.registry.setCollectionChain(chain1, [run1, run2]) 

703 butler.pruneCollection(chain1, unstore=True) 

704 with self.assertRaises(MissingCollectionError): 

705 butler.registry.getCollectionType(chain1) 

706 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

707 [ref1, ref2, ref3]) 

708 self.assertFalse(butler.datastore.exists(ref1)) 

709 self.assertTrue(butler.datastore.exists(ref2)) 

710 self.assertFalse(butler.datastore.exists(ref3)) 

711 # Remove run1. This removes ref1 and ref3 from the registry (they're 

712 # already gone from the datastore, which is fine). 

713 butler.pruneCollection(run1, purge=True, unstore=True) 

714 with self.assertRaises(MissingCollectionError): 

715 butler.registry.getCollectionType(run1) 

716 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

717 [ref2]) 

718 self.assertTrue(butler.datastore.exists(ref2)) 

719 # Remove run2. This removes ref2 from the registry and the datastore. 

720 butler.pruneCollection(run2, purge=True, unstore=True) 

721 with self.assertRaises(MissingCollectionError): 

722 butler.registry.getCollectionType(run2) 

723 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

724 []) 

725 

726 # Now that the collections have been pruned we can remove the 

727 # dataset type 

728 butler.registry.removeDatasetType(datasetType.name) 

729 

730 def testPickle(self): 

731 """Test pickle support. 

732 """ 

733 butler = Butler(self.tmpConfigFile, run="ingest") 

734 butlerOut = pickle.loads(pickle.dumps(butler)) 

735 self.assertIsInstance(butlerOut, Butler) 

736 self.assertEqual(butlerOut._config, butler._config) 

737 self.assertEqual(butlerOut.collections, butler.collections) 

738 self.assertEqual(butlerOut.run, butler.run) 

739 

740 def testGetDatasetTypes(self): 

741 butler = Butler(self.tmpConfigFile, run="ingest") 

742 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

743 dimensionEntries = [ 

744 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

745 {"instrument": "DummyCamComp"}), 

746 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

747 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

748 ] 

749 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

750 # Add needed Dimensions 

751 for args in dimensionEntries: 

752 butler.registry.insertDimensionData(*args) 

753 

754 # When a DatasetType is added to the registry entries are not created 

755 # for components but querying them can return the components. 

756 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

757 components = set() 

758 for datasetTypeName in datasetTypeNames: 

759 # Create and register a DatasetType 

760 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

761 

762 for componentName in storageClass.components: 

763 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

764 

765 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

766 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

767 

768 # Now that we have some dataset types registered, validate them 

769 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

770 "datasetType.component"]) 

771 

772 # Add a new datasetType that will fail template validation 

773 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

774 if self.validationCanFail: 

775 with self.assertRaises(ValidationError): 

776 butler.validateConfiguration() 

777 

778 # Rerun validation but with a subset of dataset type names 

779 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

780 

781 # Rerun validation but ignore the bad datasetType 

782 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

783 "datasetType.component"]) 

784 

785 def testTransaction(self): 

786 butler = Butler(self.tmpConfigFile, run="ingest") 

787 datasetTypeName = "test_metric" 

788 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

789 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

790 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

791 "band": "R"}), 

792 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

793 "physical_filter": "d-r"})) 

794 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

795 metric = makeExampleMetrics() 

796 dataId = {"instrument": "DummyCam", "visit": 42} 

797 # Create and register a DatasetType 

798 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

799 with self.assertRaises(TransactionTestError): 

800 with butler.transaction(): 

801 # Add needed Dimensions 

802 for args in dimensionEntries: 

803 butler.registry.insertDimensionData(*args) 

804 # Store a dataset 

805 ref = butler.put(metric, datasetTypeName, dataId) 

806 self.assertIsInstance(ref, DatasetRef) 

807 # Test getDirect 

808 metricOut = butler.getDirect(ref) 

809 self.assertEqual(metric, metricOut) 

810 # Test get 

811 metricOut = butler.get(datasetTypeName, dataId) 

812 self.assertEqual(metric, metricOut) 

813 # Check we can get components 

814 self.assertGetComponents(butler, ref, 

815 ("summary", "data", "output"), metric) 

816 raise TransactionTestError("This should roll back the entire transaction") 

817 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"): 

818 butler.registry.expandDataId(dataId) 

819 # Should raise LookupError for missing data ID value 

820 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

821 butler.get(datasetTypeName, dataId) 

822 # Also check explicitly if Dataset entry is missing 

823 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

824 # Direct retrieval should not find the file in the Datastore 

825 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

826 butler.getDirect(ref) 

827 

828 def testMakeRepo(self): 

829 """Test that we can write butler configuration to a new repository via 

830 the Butler.makeRepo interface and then instantiate a butler from the 

831 repo root. 

832 """ 

833 # Do not run the test if we know this datastore configuration does 

834 # not support a file system root 

835 if self.fullConfigKey is None: 

836 return 

837 

838 # create two separate directories 

839 root1 = tempfile.mkdtemp(dir=self.root) 

840 root2 = tempfile.mkdtemp(dir=self.root) 

841 

842 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

843 limited = Config(self.configFile) 

844 butler1 = Butler(butlerConfig) 

845 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

846 full = Config(self.tmpConfigFile) 

847 butler2 = Butler(butlerConfig) 

848 # Butlers should have the same configuration regardless of whether 

849 # defaults were expanded. 

850 self.assertEqual(butler1._config, butler2._config) 

851 # Config files loaded directly should not be the same. 

852 self.assertNotEqual(limited, full) 

853 # Make sure "limited" doesn't have a few keys we know it should be 

854 # inheriting from defaults. 

855 self.assertIn(self.fullConfigKey, full) 

856 self.assertNotIn(self.fullConfigKey, limited) 

857 

858 # Collections don't appear until something is put in them 

859 collections1 = set(butler1.registry.queryCollections()) 

860 self.assertEqual(collections1, set()) 

861 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

862 

863 # Check that a config with no associated file name will not 

864 # work properly with relocatable Butler repo 

865 butlerConfig.configFile = None 

866 with self.assertRaises(ValueError): 

867 Butler(butlerConfig) 

868 

869 with self.assertRaises(FileExistsError): 

870 Butler.makeRepo(self.root, standalone=True, 

871 config=Config(self.configFile), overwrite=False) 

872 

873 def testStringification(self): 

874 butler = Butler(self.tmpConfigFile, run="ingest") 

875 butlerStr = str(butler) 

876 

877 if self.datastoreStr is not None: 

878 for testStr in self.datastoreStr: 

879 self.assertIn(testStr, butlerStr) 

880 if self.registryStr is not None: 

881 self.assertIn(self.registryStr, butlerStr) 

882 

883 datastoreName = butler.datastore.name 

884 if self.datastoreName is not None: 

885 for testStr in self.datastoreName: 

886 self.assertIn(testStr, datastoreName) 

887 

888 

889class FileDatastoreButlerTests(ButlerTests): 

890 """Common tests and specialization of ButlerTests for butlers backed 

891 by datastores that inherit from FileDatastore. 

892 """ 

893 

894 def checkFileExists(self, root, relpath): 

895 """Checks if file exists at a given path (relative to root). 

896 

897 Test testPutTemplates verifies actual physical existance of the files 

898 in the requested location. 

899 """ 

900 uri = ButlerURI(root, forceDirectory=True) 

901 return uri.join(relpath).exists() 

902 

903 def testPutTemplates(self): 

904 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

905 butler = Butler(self.tmpConfigFile, run="ingest") 

906 

907 # Add needed Dimensions 

908 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

909 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

910 "name": "d-r", 

911 "band": "R"}) 

912 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

913 "physical_filter": "d-r"}) 

914 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

915 "physical_filter": "d-r"}) 

916 

917 # Create and store a dataset 

918 metric = makeExampleMetrics() 

919 

920 # Create two almost-identical DatasetTypes (both will use default 

921 # template) 

922 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

923 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

924 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

925 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

926 

927 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

928 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

929 

930 # Put with exactly the data ID keys needed 

931 ref = butler.put(metric, "metric1", dataId1) 

932 uri = butler.getURI(ref) 

933 self.assertTrue(self.checkFileExists(butler.datastore.root, 

934 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"), 

935 f"Checking existence of {uri}") 

936 

937 # Check the template based on dimensions 

938 butler.datastore.templates.validateTemplates([ref]) 

939 

940 # Put with extra data ID keys (physical_filter is an optional 

941 # dependency); should not change template (at least the way we're 

942 # defining them to behave now; the important thing is that they 

943 # must be consistent). 

944 ref = butler.put(metric, "metric2", dataId2) 

945 uri = butler.getURI(ref) 

946 self.assertTrue(self.checkFileExists(butler.datastore.root, 

947 "ingest/metric2/d-r/DummyCamComp_v423.pickle"), 

948 f"Checking existence of {uri}") 

949 

950 # Check the template based on dimensions 

951 butler.datastore.templates.validateTemplates([ref]) 

952 

953 # Now use a file template that will not result in unique filenames 

954 with self.assertRaises(FileTemplateValidationError): 

955 butler.put(metric, "metric3", dataId1) 

956 

957 def testImportExport(self): 

958 # Run put/get tests just to create and populate a repo. 

959 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

960 self.runImportExportTest(storageClass) 

961 

962 @unittest.expectedFailure 

963 def testImportExportVirtualComposite(self): 

964 # Run put/get tests just to create and populate a repo. 

965 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

966 self.runImportExportTest(storageClass) 

967 

968 def runImportExportTest(self, storageClass): 

969 """This test does an export to a temp directory and an import back 

970 into a new temp directory repo. It does not assume a posix datastore""" 

971 exportButler = self.runPutGetTest(storageClass, "test_metric") 

972 print("Root:", exportButler.datastore.root) 

973 # Test that the repo actually has at least one dataset. 

974 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

975 self.assertGreater(len(datasets), 0) 

976 # Add a DimensionRecord that's unused by those datasets. 

977 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

978 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

979 # Export and then import datasets. 

980 with safeTestTempDir(TESTDIR) as exportDir: 

981 exportFile = os.path.join(exportDir, "exports.yaml") 

982 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

983 export.saveDatasets(datasets) 

984 # Export the same datasets again. This should quietly do 

985 # nothing because of internal deduplication, and it shouldn't 

986 # complain about being asked to export the "htm7" elements even 

987 # though there aren't any in these datasets or in the database. 

988 export.saveDatasets(datasets, elements=["htm7"]) 

989 # Save one of the data IDs again; this should be harmless 

990 # because of internal deduplication. 

991 export.saveDataIds([datasets[0].dataId]) 

992 # Save some dimension records directly. 

993 export.saveDimensionData("skymap", [skymapRecord]) 

994 self.assertTrue(os.path.exists(exportFile)) 

995 with safeTestTempDir(TESTDIR) as importDir: 

996 # We always want this to be a local posix butler 

997 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

998 # Calling script.butlerImport tests the implementation of the 

999 # butler command line interface "import" subcommand. Functions 

1000 # in the script folder are generally considered protected and 

1001 # should not be used as public api. 

1002 with open(exportFile, "r") as f: 

1003 script.butlerImport(importDir, export_file=f, directory=exportDir, 

1004 transfer="auto", skip_dimensions=None, reuse_ids=False) 

1005 importButler = Butler(importDir, run="ingest") 

1006 for ref in datasets: 

1007 with self.subTest(ref=ref): 

1008 # Test for existence by passing in the DatasetType and 

1009 # data ID separately, to avoid lookup by dataset_id. 

1010 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1011 self.assertEqual(list(importButler.registry.queryDimensionRecords("skymap")), 

1012 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)]) 

1013 

1014 def testRemoveRuns(self): 

1015 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1016 butler = Butler(self.tmpConfigFile, writeable=True) 

1017 # Load registry data with dimensions to hang datasets off of. 

1018 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1019 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1020 # Add some RUN-type collection. 

1021 run1 = "run1" 

1022 butler.registry.registerRun(run1) 

1023 run2 = "run2" 

1024 butler.registry.registerRun(run2) 

1025 # put a dataset in each 

1026 metric = makeExampleMetrics() 

1027 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1028 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

1029 butler.registry) 

1030 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1031 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1032 uri1 = butler.getURI(ref1, collections=[run1]) 

1033 uri2 = butler.getURI(ref2, collections=[run2]) 

1034 # Remove from both runs with different values for unstore. 

1035 butler.removeRuns([run1], unstore=True) 

1036 butler.removeRuns([run2], unstore=False) 

1037 # Should be nothing in registry for either one, and datastore should 

1038 # not think either exists. 

1039 with self.assertRaises(MissingCollectionError): 

1040 butler.registry.getCollectionType(run1) 

1041 with self.assertRaises(MissingCollectionError): 

1042 butler.registry.getCollectionType(run2) 

1043 self.assertFalse(butler.datastore.exists(ref1)) 

1044 self.assertFalse(butler.datastore.exists(ref2)) 

1045 # The ref we unstored should be gone according to the URI, but the 

1046 # one we forgot should still be around. 

1047 self.assertFalse(uri1.exists()) 

1048 self.assertTrue(uri2.exists()) 

1049 

1050 

1051class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1052 """PosixDatastore specialization of a butler""" 

1053 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1054 fullConfigKey = ".datastore.formatters" 

1055 validationCanFail = True 

1056 datastoreStr = ["/tmp"] 

1057 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1058 registryStr = "/gen3.sqlite3" 

1059 

1060 def testExportTransferCopy(self): 

1061 """Test local export using all transfer modes""" 

1062 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1063 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1064 # Test that the repo actually has at least one dataset. 

1065 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1066 self.assertGreater(len(datasets), 0) 

1067 uris = [exportButler.getURI(d) for d in datasets] 

1068 datastoreRoot = exportButler.datastore.root 

1069 

1070 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1071 

1072 for path in pathsInStore: 

1073 # Assume local file system 

1074 self.assertTrue(self.checkFileExists(datastoreRoot, path), 

1075 f"Checking path {path}") 

1076 

1077 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1078 with safeTestTempDir(TESTDIR) as exportDir: 

1079 with exportButler.export(directory=exportDir, format="yaml", 

1080 transfer=transfer) as export: 

1081 export.saveDatasets(datasets) 

1082 for path in pathsInStore: 

1083 self.assertTrue(self.checkFileExists(exportDir, path), 

1084 f"Check that mode {transfer} exported files") 

1085 

1086 

1087class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1088 """InMemoryDatastore specialization of a butler""" 

1089 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1090 fullConfigKey = None 

1091 useTempRoot = False 

1092 validationCanFail = False 

1093 datastoreStr = ["datastore='InMemory"] 

1094 datastoreName = ["InMemoryDatastore@"] 

1095 registryStr = "/gen3.sqlite3" 

1096 

1097 def testIngest(self): 

1098 pass 

1099 

1100 

1101class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1102 """PosixDatastore specialization""" 

1103 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1104 fullConfigKey = ".datastore.datastores.1.formatters" 

1105 validationCanFail = True 

1106 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1107 datastoreName = ["InMemoryDatastore@", f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1108 "SecondDatastore"] 

1109 registryStr = "/gen3.sqlite3" 

1110 

1111 

1112class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1113 """Test that a yaml file in one location can refer to a root in another.""" 

1114 

1115 datastoreStr = ["dir1"] 

1116 # Disable the makeRepo test since we are deliberately not using 

1117 # butler.yaml as the config name. 

1118 fullConfigKey = None 

1119 

1120 def setUp(self): 

1121 self.root = makeTestTempDir(TESTDIR) 

1122 

1123 # Make a new repository in one place 

1124 self.dir1 = os.path.join(self.root, "dir1") 

1125 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1126 

1127 # Move the yaml file to a different place and add a "root" 

1128 self.dir2 = os.path.join(self.root, "dir2") 

1129 safeMakeDir(self.dir2) 

1130 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1131 config = Config(configFile1) 

1132 config["root"] = self.dir1 

1133 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1134 config.dumpToUri(configFile2) 

1135 os.remove(configFile1) 

1136 self.tmpConfigFile = configFile2 

1137 

1138 def testFileLocations(self): 

1139 self.assertNotEqual(self.dir1, self.dir2) 

1140 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1141 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1142 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1143 

1144 

1145class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1146 """Test that a config file created by makeRepo outside of repo works.""" 

1147 

1148 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1149 

1150 def setUp(self): 

1151 self.root = makeTestTempDir(TESTDIR) 

1152 self.root2 = makeTestTempDir(TESTDIR) 

1153 

1154 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1155 Butler.makeRepo(self.root, config=Config(self.configFile), 

1156 outfile=self.tmpConfigFile) 

1157 

1158 def tearDown(self): 

1159 if os.path.exists(self.root2): 

1160 shutil.rmtree(self.root2, ignore_errors=True) 

1161 super().tearDown() 

1162 

1163 def testConfigExistence(self): 

1164 c = Config(self.tmpConfigFile) 

1165 uri_config = ButlerURI(c["root"]) 

1166 uri_expected = ButlerURI(self.root, forceDirectory=True) 

1167 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1168 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1169 

1170 def testPutGet(self): 

1171 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1172 self.runPutGetTest(storageClass, "test_metric") 

1173 

1174 

1175class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1176 """Test that a config file created by makeRepo outside of repo works.""" 

1177 

1178 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1179 

1180 def setUp(self): 

1181 self.root = makeTestTempDir(TESTDIR) 

1182 self.root2 = makeTestTempDir(TESTDIR) 

1183 

1184 self.tmpConfigFile = self.root2 

1185 Butler.makeRepo(self.root, config=Config(self.configFile), 

1186 outfile=self.tmpConfigFile) 

1187 

1188 def testConfigExistence(self): 

1189 # Append the yaml file else Config constructor does not know the file 

1190 # type. 

1191 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1192 super().testConfigExistence() 

1193 

1194 

1195class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1196 """Test that a config file created by makeRepo outside of repo works.""" 

1197 

1198 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1199 

1200 def setUp(self): 

1201 self.root = makeTestTempDir(TESTDIR) 

1202 self.root2 = makeTestTempDir(TESTDIR) 

1203 

1204 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

1205 Butler.makeRepo(self.root, config=Config(self.configFile), 

1206 outfile=self.tmpConfigFile) 

1207 

1208 

1209@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1210@mock_s3 

1211class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1212 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1213 a local in-memory SqlRegistry. 

1214 """ 

1215 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1216 fullConfigKey = None 

1217 validationCanFail = True 

1218 

1219 bucketName = "anybucketname" 

1220 """Name of the Bucket that will be used in the tests. The name is read from 

1221 the config file used with the tests during set-up. 

1222 """ 

1223 

1224 root = "butlerRoot/" 

1225 """Root repository directory expected to be used in case useTempRoot=False. 

1226 Otherwise the root is set to a 20 characters long randomly generated string 

1227 during set-up. 

1228 """ 

1229 

1230 datastoreStr = [f"datastore={root}"] 

1231 """Contains all expected root locations in a format expected to be 

1232 returned by Butler stringification. 

1233 """ 

1234 

1235 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1236 """The expected format of the S3 Datastore string.""" 

1237 

1238 registryStr = "/gen3.sqlite3" 

1239 """Expected format of the Registry string.""" 

1240 

1241 def genRoot(self): 

1242 """Returns a random string of len 20 to serve as a root 

1243 name for the temporary bucket repo. 

1244 

1245 This is equivalent to tempfile.mkdtemp as this is what self.root 

1246 becomes when useTempRoot is True. 

1247 """ 

1248 rndstr = "".join( 

1249 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1250 ) 

1251 return rndstr + "/" 

1252 

1253 def setUp(self): 

1254 config = Config(self.configFile) 

1255 uri = ButlerURI(config[".datastore.datastore.root"]) 

1256 self.bucketName = uri.netloc 

1257 

1258 # set up some fake credentials if they do not exist 

1259 self.usingDummyCredentials = setAwsEnvCredentials() 

1260 

1261 if self.useTempRoot: 

1262 self.root = self.genRoot() 

1263 rooturi = f"s3://{self.bucketName}/{self.root}" 

1264 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1265 

1266 # need local folder to store registry database 

1267 self.reg_dir = makeTestTempDir(TESTDIR) 

1268 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1269 

1270 # MOTO needs to know that we expect Bucket bucketname to exist 

1271 # (this used to be the class attribute bucketName) 

1272 s3 = boto3.resource("s3") 

1273 s3.create_bucket(Bucket=self.bucketName) 

1274 

1275 self.datastoreStr = f"datastore={self.root}" 

1276 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1277 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1278 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1279 

1280 def tearDown(self): 

1281 s3 = boto3.resource("s3") 

1282 bucket = s3.Bucket(self.bucketName) 

1283 try: 

1284 bucket.objects.all().delete() 

1285 except botocore.exceptions.ClientError as e: 

1286 if e.response["Error"]["Code"] == "404": 

1287 # the key was not reachable - pass 

1288 pass 

1289 else: 

1290 raise 

1291 

1292 bucket = s3.Bucket(self.bucketName) 

1293 bucket.delete() 

1294 

1295 # unset any potentially set dummy credentials 

1296 if self.usingDummyCredentials: 

1297 unsetAwsEnvCredentials() 

1298 

1299 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1300 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1301 

1302 if self.useTempRoot and os.path.exists(self.root): 

1303 shutil.rmtree(self.root, ignore_errors=True) 

1304 

1305 

1306@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1307# Mock required environment variables during tests 

1308@unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1309 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1310 TESTDIR, "config/testConfigs/webdav/token"), 

1311 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1312class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1313 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1314 a local in-memory SqlRegistry. 

1315 """ 

1316 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1317 fullConfigKey = None 

1318 validationCanFail = True 

1319 

1320 serverName = "localhost" 

1321 """Name of the server that will be used in the tests. 

1322 """ 

1323 

1324 portNumber = 8080 

1325 """Port on which the webdav server listens. Automatically chosen 

1326 at setUpClass via the _getfreeport() method 

1327 """ 

1328 

1329 root = "butlerRoot/" 

1330 """Root repository directory expected to be used in case useTempRoot=False. 

1331 Otherwise the root is set to a 20 characters long randomly generated string 

1332 during set-up. 

1333 """ 

1334 

1335 datastoreStr = [f"datastore={root}"] 

1336 """Contains all expected root locations in a format expected to be 

1337 returned by Butler stringification. 

1338 """ 

1339 

1340 datastoreName = ["FileDatastore@https://{serverName}/{root}"] 

1341 """The expected format of the WebdavDatastore string.""" 

1342 

1343 registryStr = "/gen3.sqlite3" 

1344 """Expected format of the Registry string.""" 

1345 

1346 serverThread = None 

1347 """Thread in which the local webdav server will run""" 

1348 

1349 stopWebdavServer = False 

1350 """This flag will cause the webdav server to 

1351 gracefully shut down when True 

1352 """ 

1353 

1354 def genRoot(self): 

1355 """Returns a random string of len 20 to serve as a root 

1356 name for the temporary bucket repo. 

1357 

1358 This is equivalent to tempfile.mkdtemp as this is what self.root 

1359 becomes when useTempRoot is True. 

1360 """ 

1361 rndstr = "".join( 

1362 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1363 ) 

1364 return rndstr + "/" 

1365 

1366 @classmethod 

1367 def setUpClass(cls): 

1368 # Do the same as inherited class 

1369 cls.storageClassFactory = StorageClassFactory() 

1370 cls.storageClassFactory.addFromConfig(cls.configFile) 

1371 

1372 cls.portNumber = cls._getfreeport() 

1373 # Run a local webdav server on which tests will be run 

1374 cls.serverThread = Thread(target=cls._serveWebdav, 

1375 args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), 

1376 daemon=True) 

1377 cls.serverThread.start() 

1378 # Wait for it to start 

1379 time.sleep(3) 

1380 

1381 @classmethod 

1382 def tearDownClass(cls): 

1383 # Ask for graceful shut down of the webdav server 

1384 cls.stopWebdavServer = True 

1385 # Wait for the thread to exit 

1386 cls.serverThread.join() 

1387 

1388 # Mock required environment variables during tests 

1389 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1390 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1391 TESTDIR, "config/testConfigs/webdav/token"), 

1392 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1393 def setUp(self): 

1394 config = Config(self.configFile) 

1395 

1396 if self.useTempRoot: 

1397 self.root = self.genRoot() 

1398 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1399 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1400 

1401 # need local folder to store registry database 

1402 self.reg_dir = makeTestTempDir(TESTDIR) 

1403 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1404 

1405 self.datastoreStr = f"datastore={self.root}" 

1406 self.datastoreName = [f"FileDatastore@{self.rooturi}"] 

1407 

1408 if not isWebdavEndpoint(self.rooturi): 

1409 raise OSError("Webdav server not running properly: cannot run tests.") 

1410 

1411 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1412 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1413 

1414 # Mock required environment variables during tests 

1415 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1416 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1417 TESTDIR, "config/testConfigs/webdav/token"), 

1418 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1419 def tearDown(self): 

1420 # Clear temporary directory 

1421 ButlerURI(self.rooturi).remove() 

1422 ButlerURI(self.rooturi).session.close() 

1423 

1424 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1425 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1426 

1427 if self.useTempRoot and os.path.exists(self.root): 

1428 shutil.rmtree(self.root, ignore_errors=True) 

1429 

1430 def _serveWebdav(self, port: int, stopWebdavServer): 

1431 """Starts a local webdav-compatible HTTP server, 

1432 Listening on http://localhost:8080 

1433 This server only runs when this test class is instantiated, 

1434 and then shuts down. Must be started is a separate thread. 

1435 

1436 Parameters 

1437 ---------- 

1438 port : `int` 

1439 The port number on which the server should listen 

1440 """ 

1441 root_path = gettempdir() 

1442 

1443 config = { 

1444 "host": "0.0.0.0", 

1445 "port": port, 

1446 "provider_mapping": {"/": root_path}, 

1447 "http_authenticator": { 

1448 "domain_controller": None 

1449 }, 

1450 "simple_dc": {"user_mapping": {"*": True}}, 

1451 "verbose": 0, 

1452 } 

1453 app = WsgiDAVApp(config) 

1454 

1455 server_args = { 

1456 "bind_addr": (config["host"], config["port"]), 

1457 "wsgi_app": app, 

1458 } 

1459 server = wsgi.Server(**server_args) 

1460 server.prepare() 

1461 

1462 try: 

1463 # Start the actual server in a separate thread 

1464 t = Thread(target=server.serve, daemon=True) 

1465 t.start() 

1466 # watch stopWebdavServer, and gracefully 

1467 # shut down the server when True 

1468 while True: 

1469 if stopWebdavServer(): 

1470 break 

1471 time.sleep(1) 

1472 except KeyboardInterrupt: 

1473 print("Caught Ctrl-C, shutting down...") 

1474 finally: 

1475 server.stop() 

1476 t.join() 

1477 

1478 def _getfreeport(): 

1479 """ 

1480 Determines a free port using sockets. 

1481 """ 

1482 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

1483 free_socket.bind(('0.0.0.0', 0)) 

1484 free_socket.listen() 

1485 port = free_socket.getsockname()[1] 

1486 free_socket.close() 

1487 return port 

1488 

1489 

1490if __name__ == "__main__": 1490 ↛ 1491line 1490 didn't jump to line 1491, because the condition on line 1490 was never true

1491 unittest.main()