Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import logging 

26import os 

27import posixpath 

28import unittest 

29import tempfile 

30import shutil 

31import pickle 

32import string 

33import random 

34import time 

35import socket 

36 

37try: 

38 import boto3 

39 import botocore 

40 from moto import mock_s3 

41except ImportError: 

42 boto3 = None 

43 

44 def mock_s3(cls): 

45 """A no-op decorator in case moto mock_s3 can not be imported. 

46 """ 

47 return cls 

48 

49try: 

50 from cheroot import wsgi 

51 from wsgidav.wsgidav_app import WsgiDAVApp 

52except ImportError: 

53 WsgiDAVApp = None 

54 

55import astropy.time 

56from threading import Thread 

57from tempfile import gettempdir 

58from lsst.utils import doImport 

59from lsst.daf.butler.core.utils import safeMakeDir 

60from lsst.daf.butler import Butler, Config, ButlerConfig 

61from lsst.daf.butler import StorageClassFactory 

62from lsst.daf.butler import DatasetType, DatasetRef, DatasetIdGenEnum 

63from lsst.daf.butler import FileTemplateValidationError, ValidationError 

64from lsst.daf.butler import FileDataset 

65from lsst.daf.butler import CollectionSearch, CollectionType 

66from lsst.daf.butler import ButlerURI 

67from lsst.daf.butler import script 

68from lsst.daf.butler.registry import MissingCollectionError, ConflictingDefinitionError 

69from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

70from lsst.daf.butler.core._butlerUri.s3utils import (setAwsEnvCredentials, 

71 unsetAwsEnvCredentials) 

72from lsst.daf.butler.core._butlerUri.http import isWebdavEndpoint 

73 

74from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

75from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

76 

77TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

78 

79 

80def makeExampleMetrics(): 

81 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

82 {"a": [1, 2, 3], 

83 "b": {"blue": 5, "red": "green"}}, 

84 [563, 234, 456.7, 752, 8, 9, 27] 

85 ) 

86 

87 

88class TransactionTestError(Exception): 

89 """Specific error for testing transactions, to prevent misdiagnosing 

90 that might otherwise occur when a standard exception is used. 

91 """ 

92 pass 

93 

94 

95class ButlerConfigTests(unittest.TestCase): 

96 """Simple tests for ButlerConfig that are not tested in other test cases. 

97 """ 

98 

99 def testSearchPath(self): 

100 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

101 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

102 config1 = ButlerConfig(configFile) 

103 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

104 

105 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

106 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

107 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

108 self.assertIn("testConfigs", "\n".join(cm.output)) 

109 

110 key = ("datastore", "records", "table") 

111 self.assertNotEqual(config1[key], config2[key]) 

112 self.assertEqual(config2[key], "override_record") 

113 

114 

115class ButlerPutGetTests: 

116 """Helper method for running a suite of put/get tests from different 

117 butler configurations.""" 

118 

119 root = None 

120 

121 @staticmethod 

122 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

123 """Create a DatasetType and register it 

124 """ 

125 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

126 registry.registerDatasetType(datasetType) 

127 return datasetType 

128 

129 @classmethod 

130 def setUpClass(cls): 

131 cls.storageClassFactory = StorageClassFactory() 

132 cls.storageClassFactory.addFromConfig(cls.configFile) 

133 

134 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

135 datasetType = datasetRef.datasetType 

136 dataId = datasetRef.dataId 

137 deferred = butler.getDirectDeferred(datasetRef) 

138 

139 for component in components: 

140 compTypeName = datasetType.componentTypeName(component) 

141 result = butler.get(compTypeName, dataId, collections=collections) 

142 self.assertEqual(result, getattr(reference, component)) 

143 result_deferred = deferred.get(component=component) 

144 self.assertEqual(result_deferred, result) 

145 

146 def tearDown(self): 

147 removeTestTempDir(self.root) 

148 

149 def runPutGetTest(self, storageClass, datasetTypeName): 

150 # New datasets will be added to run and tag, but we will only look in 

151 # tag when looking up datasets. 

152 run = "ingest" 

153 butler = Butler(self.tmpConfigFile, run=run) 

154 

155 collections = set(butler.registry.queryCollections()) 

156 self.assertEqual(collections, set([run])) 

157 

158 # Create and register a DatasetType 

159 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

160 

161 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

162 

163 # Add needed Dimensions 

164 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

165 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

166 "name": "d-r", 

167 "band": "R"}) 

168 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp", 

169 "id": 1, 

170 "name": "default"}) 

171 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

172 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

173 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

174 "name": "fourtwentythree", "physical_filter": "d-r", 

175 "visit_system": 1, "datetime_begin": visit_start, 

176 "datetime_end": visit_end}) 

177 

178 # Add a second visit for some later tests 

179 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424, 

180 "name": "fourtwentyfour", "physical_filter": "d-r", 

181 "visit_system": 1}) 

182 

183 # Create and store a dataset 

184 metric = makeExampleMetrics() 

185 dataId = {"instrument": "DummyCamComp", "visit": 423} 

186 

187 # Create a DatasetRef for put 

188 refIn = DatasetRef(datasetType, dataId, id=None) 

189 

190 # Put with a preexisting id should fail 

191 with self.assertRaises(ValueError): 

192 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

193 

194 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

195 # and once with a DatasetType 

196 

197 # Keep track of any collections we add and do not clean up 

198 expected_collections = {run} 

199 

200 counter = 0 

201 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

202 # Since we are using subTest we can get cascading failures 

203 # here with the first attempt failing and the others failing 

204 # immediately because the dataset already exists. Work around 

205 # this by using a distinct run collection each time 

206 counter += 1 

207 this_run = f"put_run_{counter}" 

208 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

209 expected_collections.update({this_run}) 

210 

211 with self.subTest(args=args): 

212 ref = butler.put(metric, *args, run=this_run) 

213 self.assertIsInstance(ref, DatasetRef) 

214 

215 # Test getDirect 

216 metricOut = butler.getDirect(ref) 

217 self.assertEqual(metric, metricOut) 

218 # Test get 

219 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

220 self.assertEqual(metric, metricOut) 

221 # Test get with a datasetRef 

222 metricOut = butler.get(ref, collections=this_run) 

223 self.assertEqual(metric, metricOut) 

224 # Test getDeferred with dataId 

225 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

226 self.assertEqual(metric, metricOut) 

227 # Test getDeferred with a datasetRef 

228 metricOut = butler.getDeferred(ref, collections=this_run).get() 

229 self.assertEqual(metric, metricOut) 

230 # and deferred direct with ref 

231 metricOut = butler.getDirectDeferred(ref).get() 

232 self.assertEqual(metric, metricOut) 

233 

234 # Check we can get components 

235 if storageClass.isComposite(): 

236 self.assertGetComponents(butler, ref, 

237 ("summary", "data", "output"), metric, 

238 collections=this_run) 

239 

240 # Can the artifacts themselves be retrieved? 

241 if not butler.datastore.isEphemeral: 

242 root_uri = ButlerURI(self.root) 

243 

244 for preserve_path in (True, False): 

245 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

246 # Use copy so that we can test that overwrite 

247 # protection works (using "auto" for File URIs would 

248 # use hard links and subsequent transfer would work 

249 # because it knows they are the same file). 

250 transferred = butler.retrieveArtifacts([ref], destination, 

251 preserve_path=preserve_path, transfer="copy") 

252 self.assertGreater(len(transferred), 0) 

253 artifacts = list(ButlerURI.findFileResources([destination])) 

254 self.assertEqual(set(transferred), set(artifacts)) 

255 

256 for artifact in transferred: 

257 path_in_destination = artifact.relative_to(destination) 

258 self.assertIsNotNone(path_in_destination) 

259 

260 # when path is not preserved there should not be 

261 # any path separators. 

262 num_seps = path_in_destination.count("/") 

263 if preserve_path: 

264 self.assertGreater(num_seps, 0) 

265 else: 

266 self.assertEqual(num_seps, 0) 

267 

268 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

269 n_uris = len(secondary_uris) 

270 if primary_uri: 

271 n_uris += 1 

272 self.assertEqual(len(artifacts), n_uris, "Comparing expected artifacts vs actual:" 

273 f" {artifacts} vs {primary_uri} and {secondary_uris}") 

274 

275 if preserve_path: 

276 # No need to run these twice 

277 with self.assertRaises(ValueError): 

278 butler.retrieveArtifacts([ref], destination, transfer="move") 

279 

280 with self.assertRaises(FileExistsError): 

281 butler.retrieveArtifacts([ref], destination) 

282 

283 transferred_again = butler.retrieveArtifacts([ref], destination, 

284 preserve_path=preserve_path, 

285 overwrite=True) 

286 self.assertEqual(set(transferred_again), set(transferred)) 

287 

288 # Now remove the dataset completely. 

289 butler.pruneDatasets([ref], purge=True, unstore=True, run=this_run) 

290 # Lookup with original args should still fail. 

291 with self.assertRaises(LookupError): 

292 butler.datasetExists(*args, collections=this_run) 

293 # getDirect() should still fail. 

294 with self.assertRaises(FileNotFoundError): 

295 butler.getDirect(ref) 

296 # Registry shouldn't be able to find it by dataset_id anymore. 

297 self.assertIsNone(butler.registry.getDataset(ref.id)) 

298 

299 # Do explicit registry removal since we know they are 

300 # empty 

301 butler.registry.removeCollection(this_run) 

302 expected_collections.remove(this_run) 

303 

304 # Put the dataset again, since the last thing we did was remove it 

305 # and we want to use the default collection. 

306 ref = butler.put(metric, refIn) 

307 

308 # Get with parameters 

309 stop = 4 

310 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

311 self.assertNotEqual(metric, sliced) 

312 self.assertEqual(metric.summary, sliced.summary) 

313 self.assertEqual(metric.output, sliced.output) 

314 self.assertEqual(metric.data[:stop], sliced.data) 

315 # getDeferred with parameters 

316 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

317 self.assertNotEqual(metric, sliced) 

318 self.assertEqual(metric.summary, sliced.summary) 

319 self.assertEqual(metric.output, sliced.output) 

320 self.assertEqual(metric.data[:stop], sliced.data) 

321 # getDeferred with deferred parameters 

322 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

323 self.assertNotEqual(metric, sliced) 

324 self.assertEqual(metric.summary, sliced.summary) 

325 self.assertEqual(metric.output, sliced.output) 

326 self.assertEqual(metric.data[:stop], sliced.data) 

327 

328 if storageClass.isComposite(): 

329 # Check that components can be retrieved 

330 metricOut = butler.get(ref.datasetType.name, dataId) 

331 compNameS = ref.datasetType.componentTypeName("summary") 

332 compNameD = ref.datasetType.componentTypeName("data") 

333 summary = butler.get(compNameS, dataId) 

334 self.assertEqual(summary, metric.summary) 

335 data = butler.get(compNameD, dataId) 

336 self.assertEqual(data, metric.data) 

337 

338 if "counter" in storageClass.derivedComponents: 

339 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

340 self.assertEqual(count, len(data)) 

341 

342 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId, 

343 parameters={"slice": slice(stop)}) 

344 self.assertEqual(count, stop) 

345 

346 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

347 summary = butler.getDirect(compRef) 

348 self.assertEqual(summary, metric.summary) 

349 

350 # Create a Dataset type that has the same name but is inconsistent. 

351 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions, 

352 self.storageClassFactory.getStorageClass("Config")) 

353 

354 # Getting with a dataset type that does not match registry fails 

355 with self.assertRaises(ValueError): 

356 butler.get(inconsistentDatasetType, dataId) 

357 

358 # Combining a DatasetRef with a dataId should fail 

359 with self.assertRaises(ValueError): 

360 butler.get(ref, dataId) 

361 # Getting with an explicit ref should fail if the id doesn't match 

362 with self.assertRaises(ValueError): 

363 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

364 

365 # Getting a dataset with unknown parameters should fail 

366 with self.assertRaises(KeyError): 

367 butler.get(ref, parameters={"unsupported": True}) 

368 

369 # Check we have a collection 

370 collections = set(butler.registry.queryCollections()) 

371 self.assertEqual(collections, expected_collections) 

372 

373 # Clean up to check that we can remove something that may have 

374 # already had a component removed 

375 butler.pruneDatasets([ref], unstore=True, purge=True) 

376 

377 # Check that we can configure a butler to accept a put even 

378 # if it already has the dataset in registry. 

379 ref = butler.put(metric, refIn) 

380 

381 # Repeat put will fail. 

382 with self.assertRaises(ConflictingDefinitionError): 

383 butler.put(metric, refIn) 

384 

385 # Remove the datastore entry. 

386 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

387 

388 # Put will still fail 

389 with self.assertRaises(ConflictingDefinitionError): 

390 butler.put(metric, refIn) 

391 

392 # Allow the put to succeed 

393 butler._allow_put_of_predefined_dataset = True 

394 ref2 = butler.put(metric, refIn) 

395 self.assertEqual(ref2.id, ref.id) 

396 

397 # A second put will still fail but with a different exception 

398 # than before. 

399 with self.assertRaises(ConflictingDefinitionError): 

400 butler.put(metric, refIn) 

401 

402 # Reset the flag to avoid confusion 

403 butler._allow_put_of_predefined_dataset = False 

404 

405 # Leave the dataset in place since some downstream tests require 

406 # something to be present 

407 

408 return butler 

409 

410 def testDeferredCollectionPassing(self): 

411 # Construct a butler with no run or collection, but make it writeable. 

412 butler = Butler(self.tmpConfigFile, writeable=True) 

413 # Create and register a DatasetType 

414 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

415 datasetType = self.addDatasetType("example", dimensions, 

416 self.storageClassFactory.getStorageClass("StructuredData"), 

417 butler.registry) 

418 # Add needed Dimensions 

419 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

420 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

421 "name": "d-r", 

422 "band": "R"}) 

423 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

424 "name": "fourtwentythree", "physical_filter": "d-r"}) 

425 dataId = {"instrument": "DummyCamComp", "visit": 423} 

426 # Create dataset. 

427 metric = makeExampleMetrics() 

428 # Register a new run and put dataset. 

429 run = "deferred" 

430 butler.registry.registerRun(run) 

431 ref = butler.put(metric, datasetType, dataId, run=run) 

432 # Putting with no run should fail with TypeError. 

433 with self.assertRaises(TypeError): 

434 butler.put(metric, datasetType, dataId) 

435 # Dataset should exist. 

436 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

437 # We should be able to get the dataset back, but with and without 

438 # a deferred dataset handle. 

439 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

440 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

441 # Trying to find the dataset without any collection is a TypeError. 

442 with self.assertRaises(TypeError): 

443 butler.datasetExists(datasetType, dataId) 

444 with self.assertRaises(TypeError): 

445 butler.get(datasetType, dataId) 

446 # Associate the dataset with a different collection. 

447 butler.registry.registerCollection("tagged") 

448 butler.registry.associate("tagged", [ref]) 

449 # Deleting the dataset from the new collection should make it findable 

450 # in the original collection. 

451 butler.pruneDatasets([ref], tags=["tagged"]) 

452 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

453 

454 

455class ButlerTests(ButlerPutGetTests): 

456 """Tests for Butler. 

457 """ 

458 useTempRoot = True 

459 

460 def setUp(self): 

461 """Create a new butler root for each test.""" 

462 self.root = makeTestTempDir(TESTDIR) 

463 Butler.makeRepo(self.root, config=Config(self.configFile)) 

464 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

465 

466 def testConstructor(self): 

467 """Independent test of constructor. 

468 """ 

469 butler = Butler(self.tmpConfigFile, run="ingest") 

470 self.assertIsInstance(butler, Butler) 

471 

472 collections = set(butler.registry.queryCollections()) 

473 self.assertEqual(collections, {"ingest"}) 

474 

475 butler2 = Butler(butler=butler, collections=["other"]) 

476 self.assertEqual( 

477 butler2.collections, 

478 CollectionSearch.fromExpression(["other"]) 

479 ) 

480 self.assertIsNone(butler2.run) 

481 self.assertIs(butler.datastore, butler2.datastore) 

482 

483 def testBasicPutGet(self): 

484 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

485 self.runPutGetTest(storageClass, "test_metric") 

486 

487 def testCompositePutGetConcrete(self): 

488 

489 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

490 butler = self.runPutGetTest(storageClass, "test_metric") 

491 

492 # Should *not* be disassembled 

493 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

494 self.assertEqual(len(datasets), 1) 

495 uri, components = butler.getURIs(datasets[0]) 

496 self.assertIsInstance(uri, ButlerURI) 

497 self.assertFalse(components) 

498 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

499 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

500 

501 # Predicted dataset 

502 dataId = {"instrument": "DummyCamComp", "visit": 424} 

503 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

504 self.assertFalse(components) 

505 self.assertIsInstance(uri, ButlerURI) 

506 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

507 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

508 

509 def testCompositePutGetVirtual(self): 

510 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

511 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

512 

513 # Should be disassembled 

514 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

515 self.assertEqual(len(datasets), 1) 

516 uri, components = butler.getURIs(datasets[0]) 

517 

518 if butler.datastore.isEphemeral: 

519 # Never disassemble in-memory datastore 

520 self.assertIsInstance(uri, ButlerURI) 

521 self.assertFalse(components) 

522 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

523 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

524 else: 

525 self.assertIsNone(uri) 

526 self.assertEqual(set(components), set(storageClass.components)) 

527 for compuri in components.values(): 

528 self.assertIsInstance(compuri, ButlerURI) 

529 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

530 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

531 

532 # Predicted dataset 

533 dataId = {"instrument": "DummyCamComp", "visit": 424} 

534 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

535 

536 if butler.datastore.isEphemeral: 

537 # Never disassembled 

538 self.assertIsInstance(uri, ButlerURI) 

539 self.assertFalse(components) 

540 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

541 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

542 else: 

543 self.assertIsNone(uri) 

544 self.assertEqual(set(components), set(storageClass.components)) 

545 for compuri in components.values(): 

546 self.assertIsInstance(compuri, ButlerURI) 

547 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

548 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

549 

550 def testIngest(self): 

551 butler = Butler(self.tmpConfigFile, run="ingest") 

552 

553 # Create and register a DatasetType 

554 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

555 

556 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

557 datasetTypeName = "metric" 

558 

559 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

560 

561 # Add needed Dimensions 

562 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

563 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

564 "name": "d-r", 

565 "band": "R"}) 

566 for detector in (1, 2): 

567 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

568 "full_name": f"detector{detector}"}) 

569 

570 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

571 "name": "fourtwentythree", "physical_filter": "d-r"}, 

572 {"instrument": "DummyCamComp", "id": 424, 

573 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

574 

575 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

576 dataRoot = os.path.join(TESTDIR, "data", "basic") 

577 datasets = [] 

578 for detector in (1, 2): 

579 detector_name = f"detector_{detector}" 

580 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

581 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

582 # Create a DatasetRef for ingest 

583 refIn = DatasetRef(datasetType, dataId, id=None) 

584 

585 datasets.append(FileDataset(path=metricFile, 

586 refs=[refIn], 

587 formatter=formatter)) 

588 

589 butler.ingest(*datasets, transfer="copy") 

590 

591 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

592 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

593 

594 metrics1 = butler.get(datasetTypeName, dataId1) 

595 metrics2 = butler.get(datasetTypeName, dataId2) 

596 self.assertNotEqual(metrics1, metrics2) 

597 

598 # Compare URIs 

599 uri1 = butler.getURI(datasetTypeName, dataId1) 

600 uri2 = butler.getURI(datasetTypeName, dataId2) 

601 self.assertNotEqual(uri1, uri2) 

602 

603 # Now do a multi-dataset but single file ingest 

604 metricFile = os.path.join(dataRoot, "detectors.yaml") 

605 refs = [] 

606 for detector in (1, 2): 

607 detector_name = f"detector_{detector}" 

608 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

609 # Create a DatasetRef for ingest 

610 refs.append(DatasetRef(datasetType, dataId, id=None)) 

611 

612 datasets = [] 

613 datasets.append(FileDataset(path=metricFile, 

614 refs=refs, 

615 formatter=MultiDetectorFormatter)) 

616 

617 butler.ingest(*datasets, transfer="copy") 

618 

619 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

620 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

621 

622 multi1 = butler.get(datasetTypeName, dataId1) 

623 multi2 = butler.get(datasetTypeName, dataId2) 

624 

625 self.assertEqual(multi1, metrics1) 

626 self.assertEqual(multi2, metrics2) 

627 

628 # Compare URIs 

629 uri1 = butler.getURI(datasetTypeName, dataId1) 

630 uri2 = butler.getURI(datasetTypeName, dataId2) 

631 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

632 

633 # Test that removing one does not break the second 

634 # This line will issue a warning log message for a ChainedDatastore 

635 # that uses an InMemoryDatastore since in-memory can not ingest 

636 # files. 

637 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

638 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

639 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

640 multi2b = butler.get(datasetTypeName, dataId2) 

641 self.assertEqual(multi2, multi2b) 

642 

643 def testPruneCollections(self): 

644 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

645 butler = Butler(self.tmpConfigFile, writeable=True) 

646 # Load registry data with dimensions to hang datasets off of. 

647 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

648 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

649 # Add some RUN-type collections. 

650 run1 = "run1" 

651 butler.registry.registerRun(run1) 

652 run2 = "run2" 

653 butler.registry.registerRun(run2) 

654 # put some datasets. ref1 and ref2 have the same data ID, and are in 

655 # different runs. ref3 has a different data ID. 

656 metric = makeExampleMetrics() 

657 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

658 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

659 butler.registry) 

660 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

661 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

662 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

663 

664 # Try to delete a RUN collection without purge, or with purge and not 

665 # unstore. 

666 with self.assertRaises(TypeError): 

667 butler.pruneCollection(run1) 

668 with self.assertRaises(TypeError): 

669 butler.pruneCollection(run2, purge=True) 

670 # Add a TAGGED collection and associate ref3 only into it. 

671 tag1 = "tag1" 

672 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

673 butler.registry.associate(tag1, [ref3]) 

674 # Add a CHAINED collection that searches run1 and then run2. It 

675 # logically contains only ref1, because ref2 is shadowed due to them 

676 # having the same data ID and dataset type. 

677 chain1 = "chain1" 

678 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

679 butler.registry.setCollectionChain(chain1, [run1, run2]) 

680 # Try to delete RUN collections, which should fail with complete 

681 # rollback because they're still referenced by the CHAINED 

682 # collection. 

683 with self.assertRaises(Exception): 

684 butler.pruneCollection(run1, pruge=True, unstore=True) 

685 with self.assertRaises(Exception): 

686 butler.pruneCollection(run2, pruge=True, unstore=True) 

687 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

688 [ref1, ref2, ref3]) 

689 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

690 self.assertTrue(existence[ref1]) 

691 self.assertTrue(existence[ref2]) 

692 self.assertTrue(existence[ref3]) 

693 # Try to delete CHAINED and TAGGED collections with purge; should not 

694 # work. 

695 with self.assertRaises(TypeError): 

696 butler.pruneCollection(tag1, purge=True, unstore=True) 

697 with self.assertRaises(TypeError): 

698 butler.pruneCollection(chain1, purge=True, unstore=True) 

699 # Remove the tagged collection with unstore=False. This should not 

700 # affect the datasets. 

701 butler.pruneCollection(tag1) 

702 with self.assertRaises(MissingCollectionError): 

703 butler.registry.getCollectionType(tag1) 

704 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

705 [ref1, ref2, ref3]) 

706 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

707 self.assertTrue(existence[ref1]) 

708 self.assertTrue(existence[ref2]) 

709 self.assertTrue(existence[ref3]) 

710 # Add the tagged collection back in, and remove it with unstore=True. 

711 # This should remove ref3 only from the datastore. 

712 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

713 butler.registry.associate(tag1, [ref3]) 

714 butler.pruneCollection(tag1, unstore=True) 

715 with self.assertRaises(MissingCollectionError): 

716 butler.registry.getCollectionType(tag1) 

717 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

718 [ref1, ref2, ref3]) 

719 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

720 self.assertTrue(existence[ref1]) 

721 self.assertTrue(existence[ref2]) 

722 self.assertFalse(existence[ref3]) 

723 # Delete the chain with unstore=False. The datasets should not be 

724 # affected at all. 

725 butler.pruneCollection(chain1) 

726 with self.assertRaises(MissingCollectionError): 

727 butler.registry.getCollectionType(chain1) 

728 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

729 [ref1, ref2, ref3]) 

730 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

731 self.assertTrue(existence[ref1]) 

732 self.assertTrue(existence[ref2]) 

733 self.assertFalse(existence[ref3]) 

734 # Redefine and then delete the chain with unstore=True. Only ref1 

735 # should be unstored (ref3 has already been unstored, but otherwise 

736 # would be now). 

737 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

738 butler.registry.setCollectionChain(chain1, [run1, run2]) 

739 butler.pruneCollection(chain1, unstore=True) 

740 with self.assertRaises(MissingCollectionError): 

741 butler.registry.getCollectionType(chain1) 

742 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

743 [ref1, ref2, ref3]) 

744 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

745 self.assertFalse(existence[ref1]) 

746 self.assertTrue(existence[ref2]) 

747 self.assertFalse(existence[ref3]) 

748 # Remove run1. This removes ref1 and ref3 from the registry (they're 

749 # already gone from the datastore, which is fine). 

750 butler.pruneCollection(run1, purge=True, unstore=True) 

751 with self.assertRaises(MissingCollectionError): 

752 butler.registry.getCollectionType(run1) 

753 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

754 [ref2]) 

755 self.assertTrue(butler.datastore.exists(ref2)) 

756 # Remove run2. This removes ref2 from the registry and the datastore. 

757 butler.pruneCollection(run2, purge=True, unstore=True) 

758 with self.assertRaises(MissingCollectionError): 

759 butler.registry.getCollectionType(run2) 

760 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

761 []) 

762 

763 # Now that the collections have been pruned we can remove the 

764 # dataset type 

765 butler.registry.removeDatasetType(datasetType.name) 

766 

767 def testPickle(self): 

768 """Test pickle support. 

769 """ 

770 butler = Butler(self.tmpConfigFile, run="ingest") 

771 butlerOut = pickle.loads(pickle.dumps(butler)) 

772 self.assertIsInstance(butlerOut, Butler) 

773 self.assertEqual(butlerOut._config, butler._config) 

774 self.assertEqual(butlerOut.collections, butler.collections) 

775 self.assertEqual(butlerOut.run, butler.run) 

776 

777 def testGetDatasetTypes(self): 

778 butler = Butler(self.tmpConfigFile, run="ingest") 

779 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

780 dimensionEntries = [ 

781 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

782 {"instrument": "DummyCamComp"}), 

783 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

784 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

785 ] 

786 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

787 # Add needed Dimensions 

788 for args in dimensionEntries: 

789 butler.registry.insertDimensionData(*args) 

790 

791 # When a DatasetType is added to the registry entries are not created 

792 # for components but querying them can return the components. 

793 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

794 components = set() 

795 for datasetTypeName in datasetTypeNames: 

796 # Create and register a DatasetType 

797 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

798 

799 for componentName in storageClass.components: 

800 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

801 

802 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

803 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

804 

805 # Now that we have some dataset types registered, validate them 

806 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

807 "datasetType.component", "random_data", "random_data_2"]) 

808 

809 # Add a new datasetType that will fail template validation 

810 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

811 if self.validationCanFail: 

812 with self.assertRaises(ValidationError): 

813 butler.validateConfiguration() 

814 

815 # Rerun validation but with a subset of dataset type names 

816 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

817 

818 # Rerun validation but ignore the bad datasetType 

819 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

820 "datasetType.component", "random_data", "random_data_2"]) 

821 

822 def testTransaction(self): 

823 butler = Butler(self.tmpConfigFile, run="ingest") 

824 datasetTypeName = "test_metric" 

825 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

826 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

827 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

828 "band": "R"}), 

829 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

830 "physical_filter": "d-r"})) 

831 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

832 metric = makeExampleMetrics() 

833 dataId = {"instrument": "DummyCam", "visit": 42} 

834 # Create and register a DatasetType 

835 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

836 with self.assertRaises(TransactionTestError): 

837 with butler.transaction(): 

838 # Add needed Dimensions 

839 for args in dimensionEntries: 

840 butler.registry.insertDimensionData(*args) 

841 # Store a dataset 

842 ref = butler.put(metric, datasetTypeName, dataId) 

843 self.assertIsInstance(ref, DatasetRef) 

844 # Test getDirect 

845 metricOut = butler.getDirect(ref) 

846 self.assertEqual(metric, metricOut) 

847 # Test get 

848 metricOut = butler.get(datasetTypeName, dataId) 

849 self.assertEqual(metric, metricOut) 

850 # Check we can get components 

851 self.assertGetComponents(butler, ref, 

852 ("summary", "data", "output"), metric) 

853 raise TransactionTestError("This should roll back the entire transaction") 

854 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"): 

855 butler.registry.expandDataId(dataId) 

856 # Should raise LookupError for missing data ID value 

857 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

858 butler.get(datasetTypeName, dataId) 

859 # Also check explicitly if Dataset entry is missing 

860 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

861 # Direct retrieval should not find the file in the Datastore 

862 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

863 butler.getDirect(ref) 

864 

865 def testMakeRepo(self): 

866 """Test that we can write butler configuration to a new repository via 

867 the Butler.makeRepo interface and then instantiate a butler from the 

868 repo root. 

869 """ 

870 # Do not run the test if we know this datastore configuration does 

871 # not support a file system root 

872 if self.fullConfigKey is None: 

873 return 

874 

875 # create two separate directories 

876 root1 = tempfile.mkdtemp(dir=self.root) 

877 root2 = tempfile.mkdtemp(dir=self.root) 

878 

879 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

880 limited = Config(self.configFile) 

881 butler1 = Butler(butlerConfig) 

882 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

883 full = Config(self.tmpConfigFile) 

884 butler2 = Butler(butlerConfig) 

885 # Butlers should have the same configuration regardless of whether 

886 # defaults were expanded. 

887 self.assertEqual(butler1._config, butler2._config) 

888 # Config files loaded directly should not be the same. 

889 self.assertNotEqual(limited, full) 

890 # Make sure "limited" doesn't have a few keys we know it should be 

891 # inheriting from defaults. 

892 self.assertIn(self.fullConfigKey, full) 

893 self.assertNotIn(self.fullConfigKey, limited) 

894 

895 # Collections don't appear until something is put in them 

896 collections1 = set(butler1.registry.queryCollections()) 

897 self.assertEqual(collections1, set()) 

898 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

899 

900 # Check that a config with no associated file name will not 

901 # work properly with relocatable Butler repo 

902 butlerConfig.configFile = None 

903 with self.assertRaises(ValueError): 

904 Butler(butlerConfig) 

905 

906 with self.assertRaises(FileExistsError): 

907 Butler.makeRepo(self.root, standalone=True, 

908 config=Config(self.configFile), overwrite=False) 

909 

910 def testStringification(self): 

911 butler = Butler(self.tmpConfigFile, run="ingest") 

912 butlerStr = str(butler) 

913 

914 if self.datastoreStr is not None: 

915 for testStr in self.datastoreStr: 

916 self.assertIn(testStr, butlerStr) 

917 if self.registryStr is not None: 

918 self.assertIn(self.registryStr, butlerStr) 

919 

920 datastoreName = butler.datastore.name 

921 if self.datastoreName is not None: 

922 for testStr in self.datastoreName: 

923 self.assertIn(testStr, datastoreName) 

924 

925 def testButlerRewriteDataId(self): 

926 """Test that dataIds can be rewritten based on dimension records.""" 

927 

928 butler = Butler(self.tmpConfigFile, run="ingest") 

929 

930 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

931 datasetTypeName = "random_data" 

932 

933 # Create dimension records. 

934 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

935 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

936 "name": "d-r", 

937 "band": "R"}) 

938 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", 

939 "id": 1, "full_name": "det1"}) 

940 

941 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

942 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

943 butler.registry.registerDatasetType(datasetType) 

944 

945 n_exposures = 5 

946 dayobs = 20210530 

947 

948 for i in range(n_exposures): 

949 butler.registry.insertDimensionData("exposure", {"instrument": "DummyCamComp", 

950 "id": i, "obs_id": f"exp{i}", 

951 "seq_num": i, "day_obs": dayobs, 

952 "physical_filter": "d-r"}) 

953 

954 # Write some data. 

955 for i in range(n_exposures): 

956 metric = {"something": i, 

957 "other": "metric", 

958 "list": [2*x for x in range(i)]} 

959 

960 # Use the seq_num for the put to test rewriting. 

961 dataId = {"seq_num": i, "day_obs": dayobs, "detector": 1, "instrument": "DummyCamComp", 

962 "physical_filter": "d-r"} 

963 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

964 

965 # Check that the exposure is correct in the dataId 

966 self.assertEqual(ref.dataId["exposure"], i) 

967 

968 # and check that we can get the dataset back with the same dataId 

969 new_metric = butler.get(datasetTypeName, dataId=dataId) 

970 self.assertEqual(new_metric, metric) 

971 

972 

973class FileDatastoreButlerTests(ButlerTests): 

974 """Common tests and specialization of ButlerTests for butlers backed 

975 by datastores that inherit from FileDatastore. 

976 """ 

977 

978 def checkFileExists(self, root, relpath): 

979 """Checks if file exists at a given path (relative to root). 

980 

981 Test testPutTemplates verifies actual physical existance of the files 

982 in the requested location. 

983 """ 

984 uri = ButlerURI(root, forceDirectory=True) 

985 return uri.join(relpath).exists() 

986 

987 def testPutTemplates(self): 

988 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

989 butler = Butler(self.tmpConfigFile, run="ingest") 

990 

991 # Add needed Dimensions 

992 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

993 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

994 "name": "d-r", 

995 "band": "R"}) 

996 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

997 "physical_filter": "d-r"}) 

998 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

999 "physical_filter": "d-r"}) 

1000 

1001 # Create and store a dataset 

1002 metric = makeExampleMetrics() 

1003 

1004 # Create two almost-identical DatasetTypes (both will use default 

1005 # template) 

1006 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1007 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1008 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1009 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1010 

1011 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1012 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1013 

1014 # Put with exactly the data ID keys needed 

1015 ref = butler.put(metric, "metric1", dataId1) 

1016 uri = butler.getURI(ref) 

1017 self.assertTrue(self.checkFileExists(butler.datastore.root, 

1018 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"), 

1019 f"Checking existence of {uri}") 

1020 

1021 # Check the template based on dimensions 

1022 butler.datastore.templates.validateTemplates([ref]) 

1023 

1024 # Put with extra data ID keys (physical_filter is an optional 

1025 # dependency); should not change template (at least the way we're 

1026 # defining them to behave now; the important thing is that they 

1027 # must be consistent). 

1028 ref = butler.put(metric, "metric2", dataId2) 

1029 uri = butler.getURI(ref) 

1030 self.assertTrue(self.checkFileExists(butler.datastore.root, 

1031 "ingest/metric2/d-r/DummyCamComp_v423.pickle"), 

1032 f"Checking existence of {uri}") 

1033 

1034 # Check the template based on dimensions 

1035 butler.datastore.templates.validateTemplates([ref]) 

1036 

1037 # Now use a file template that will not result in unique filenames 

1038 with self.assertRaises(FileTemplateValidationError): 

1039 butler.put(metric, "metric3", dataId1) 

1040 

1041 def testImportExport(self): 

1042 # Run put/get tests just to create and populate a repo. 

1043 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1044 self.runImportExportTest(storageClass) 

1045 

1046 @unittest.expectedFailure 

1047 def testImportExportVirtualComposite(self): 

1048 # Run put/get tests just to create and populate a repo. 

1049 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1050 self.runImportExportTest(storageClass) 

1051 

1052 def runImportExportTest(self, storageClass): 

1053 """This test does an export to a temp directory and an import back 

1054 into a new temp directory repo. It does not assume a posix datastore""" 

1055 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1056 print("Root:", exportButler.datastore.root) 

1057 # Test that the repo actually has at least one dataset. 

1058 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1059 self.assertGreater(len(datasets), 0) 

1060 # Add a DimensionRecord that's unused by those datasets. 

1061 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1062 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1063 # Export and then import datasets. 

1064 with safeTestTempDir(TESTDIR) as exportDir: 

1065 exportFile = os.path.join(exportDir, "exports.yaml") 

1066 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1067 export.saveDatasets(datasets) 

1068 # Export the same datasets again. This should quietly do 

1069 # nothing because of internal deduplication, and it shouldn't 

1070 # complain about being asked to export the "htm7" elements even 

1071 # though there aren't any in these datasets or in the database. 

1072 export.saveDatasets(datasets, elements=["htm7"]) 

1073 # Save one of the data IDs again; this should be harmless 

1074 # because of internal deduplication. 

1075 export.saveDataIds([datasets[0].dataId]) 

1076 # Save some dimension records directly. 

1077 export.saveDimensionData("skymap", [skymapRecord]) 

1078 self.assertTrue(os.path.exists(exportFile)) 

1079 with safeTestTempDir(TESTDIR) as importDir: 

1080 # We always want this to be a local posix butler 

1081 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1082 # Calling script.butlerImport tests the implementation of the 

1083 # butler command line interface "import" subcommand. Functions 

1084 # in the script folder are generally considered protected and 

1085 # should not be used as public api. 

1086 with open(exportFile, "r") as f: 

1087 script.butlerImport(importDir, export_file=f, directory=exportDir, 

1088 transfer="auto", skip_dimensions=None, reuse_ids=False) 

1089 importButler = Butler(importDir, run="ingest") 

1090 for ref in datasets: 

1091 with self.subTest(ref=ref): 

1092 # Test for existence by passing in the DatasetType and 

1093 # data ID separately, to avoid lookup by dataset_id. 

1094 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1095 self.assertEqual(list(importButler.registry.queryDimensionRecords("skymap")), 

1096 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)]) 

1097 

1098 def testRemoveRuns(self): 

1099 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1100 butler = Butler(self.tmpConfigFile, writeable=True) 

1101 # Load registry data with dimensions to hang datasets off of. 

1102 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1103 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1104 # Add some RUN-type collection. 

1105 run1 = "run1" 

1106 butler.registry.registerRun(run1) 

1107 run2 = "run2" 

1108 butler.registry.registerRun(run2) 

1109 # put a dataset in each 

1110 metric = makeExampleMetrics() 

1111 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1112 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

1113 butler.registry) 

1114 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1115 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1116 uri1 = butler.getURI(ref1, collections=[run1]) 

1117 uri2 = butler.getURI(ref2, collections=[run2]) 

1118 # Remove from both runs with different values for unstore. 

1119 butler.removeRuns([run1], unstore=True) 

1120 butler.removeRuns([run2], unstore=False) 

1121 # Should be nothing in registry for either one, and datastore should 

1122 # not think either exists. 

1123 with self.assertRaises(MissingCollectionError): 

1124 butler.registry.getCollectionType(run1) 

1125 with self.assertRaises(MissingCollectionError): 

1126 butler.registry.getCollectionType(run2) 

1127 self.assertFalse(butler.datastore.exists(ref1)) 

1128 self.assertFalse(butler.datastore.exists(ref2)) 

1129 # The ref we unstored should be gone according to the URI, but the 

1130 # one we forgot should still be around. 

1131 self.assertFalse(uri1.exists()) 

1132 self.assertTrue(uri2.exists()) 

1133 

1134 

1135class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1136 """PosixDatastore specialization of a butler""" 

1137 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1138 fullConfigKey = ".datastore.formatters" 

1139 validationCanFail = True 

1140 datastoreStr = ["/tmp"] 

1141 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1142 registryStr = "/gen3.sqlite3" 

1143 

1144 def testExportTransferCopy(self): 

1145 """Test local export using all transfer modes""" 

1146 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1147 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1148 # Test that the repo actually has at least one dataset. 

1149 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1150 self.assertGreater(len(datasets), 0) 

1151 uris = [exportButler.getURI(d) for d in datasets] 

1152 datastoreRoot = exportButler.datastore.root 

1153 

1154 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1155 

1156 for path in pathsInStore: 

1157 # Assume local file system 

1158 self.assertTrue(self.checkFileExists(datastoreRoot, path), 

1159 f"Checking path {path}") 

1160 

1161 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1162 with safeTestTempDir(TESTDIR) as exportDir: 

1163 with exportButler.export(directory=exportDir, format="yaml", 

1164 transfer=transfer) as export: 

1165 export.saveDatasets(datasets) 

1166 for path in pathsInStore: 

1167 self.assertTrue(self.checkFileExists(exportDir, path), 

1168 f"Check that mode {transfer} exported files") 

1169 

1170 def testPruneDatasets(self): 

1171 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1172 butler = Butler(self.tmpConfigFile, writeable=True) 

1173 # Load registry data with dimensions to hang datasets off of. 

1174 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1175 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1176 # Add some RUN-type collections. 

1177 run1 = "run1" 

1178 butler.registry.registerRun(run1) 

1179 run2 = "run2" 

1180 butler.registry.registerRun(run2) 

1181 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1182 # different runs. ref3 has a different data ID. 

1183 metric = makeExampleMetrics() 

1184 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1185 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

1186 butler.registry) 

1187 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1188 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1189 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1190 

1191 # Simple prune. 

1192 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1193 with self.assertRaises(LookupError): 

1194 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1) 

1195 

1196 # Put data back. 

1197 ref1 = butler.put(metric, ref1.unresolved(), run=run1) 

1198 ref2 = butler.put(metric, ref2.unresolved(), run=run2) 

1199 ref3 = butler.put(metric, ref3.unresolved(), run=run1) 

1200 

1201 # Check that in normal mode, deleting the record will lead to 

1202 # trash not touching the file. 

1203 uri1 = butler.datastore.getURI(ref1) 

1204 butler.datastore.bridge.moveToTrash([ref1]) # Update the dataset_location table 

1205 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id}) 

1206 butler.datastore.trash(ref1) 

1207 butler.datastore.emptyTrash() 

1208 self.assertTrue(uri1.exists()) 

1209 uri1.remove() # Clean it up. 

1210 

1211 # Simulate execution butler setup by deleting the datastore 

1212 # record but keeping the file around and trusting. 

1213 butler.datastore.trustGetRequest = True 

1214 uri2 = butler.datastore.getURI(ref2) 

1215 uri3 = butler.datastore.getURI(ref3) 

1216 self.assertTrue(uri2.exists()) 

1217 self.assertTrue(uri3.exists()) 

1218 

1219 # Remove the datastore record. 

1220 butler.datastore.bridge.moveToTrash([ref2]) # Update the dataset_location table 

1221 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id}) 

1222 self.assertTrue(uri2.exists()) 

1223 butler.datastore.trash([ref2, ref3]) 

1224 # Immediate removal for ref2 file 

1225 self.assertFalse(uri2.exists()) 

1226 # But ref3 has to wait for the empty. 

1227 self.assertTrue(uri3.exists()) 

1228 butler.datastore.emptyTrash() 

1229 self.assertFalse(uri3.exists()) 

1230 

1231 # Clear out the datasets from registry. 

1232 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1233 

1234 

1235class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1236 """InMemoryDatastore specialization of a butler""" 

1237 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1238 fullConfigKey = None 

1239 useTempRoot = False 

1240 validationCanFail = False 

1241 datastoreStr = ["datastore='InMemory"] 

1242 datastoreName = ["InMemoryDatastore@"] 

1243 registryStr = "/gen3.sqlite3" 

1244 

1245 def testIngest(self): 

1246 pass 

1247 

1248 

1249class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1250 """PosixDatastore specialization""" 

1251 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1252 fullConfigKey = ".datastore.datastores.1.formatters" 

1253 validationCanFail = True 

1254 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1255 datastoreName = ["InMemoryDatastore@", f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1256 "SecondDatastore"] 

1257 registryStr = "/gen3.sqlite3" 

1258 

1259 

1260class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1261 """Test that a yaml file in one location can refer to a root in another.""" 

1262 

1263 datastoreStr = ["dir1"] 

1264 # Disable the makeRepo test since we are deliberately not using 

1265 # butler.yaml as the config name. 

1266 fullConfigKey = None 

1267 

1268 def setUp(self): 

1269 self.root = makeTestTempDir(TESTDIR) 

1270 

1271 # Make a new repository in one place 

1272 self.dir1 = os.path.join(self.root, "dir1") 

1273 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1274 

1275 # Move the yaml file to a different place and add a "root" 

1276 self.dir2 = os.path.join(self.root, "dir2") 

1277 safeMakeDir(self.dir2) 

1278 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1279 config = Config(configFile1) 

1280 config["root"] = self.dir1 

1281 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1282 config.dumpToUri(configFile2) 

1283 os.remove(configFile1) 

1284 self.tmpConfigFile = configFile2 

1285 

1286 def testFileLocations(self): 

1287 self.assertNotEqual(self.dir1, self.dir2) 

1288 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1289 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1290 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1291 

1292 

1293class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1294 """Test that a config file created by makeRepo outside of repo works.""" 

1295 

1296 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1297 

1298 def setUp(self): 

1299 self.root = makeTestTempDir(TESTDIR) 

1300 self.root2 = makeTestTempDir(TESTDIR) 

1301 

1302 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1303 Butler.makeRepo(self.root, config=Config(self.configFile), 

1304 outfile=self.tmpConfigFile) 

1305 

1306 def tearDown(self): 

1307 if os.path.exists(self.root2): 

1308 shutil.rmtree(self.root2, ignore_errors=True) 

1309 super().tearDown() 

1310 

1311 def testConfigExistence(self): 

1312 c = Config(self.tmpConfigFile) 

1313 uri_config = ButlerURI(c["root"]) 

1314 uri_expected = ButlerURI(self.root, forceDirectory=True) 

1315 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1316 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1317 

1318 def testPutGet(self): 

1319 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1320 self.runPutGetTest(storageClass, "test_metric") 

1321 

1322 

1323class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1324 """Test that a config file created by makeRepo outside of repo works.""" 

1325 

1326 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1327 

1328 def setUp(self): 

1329 self.root = makeTestTempDir(TESTDIR) 

1330 self.root2 = makeTestTempDir(TESTDIR) 

1331 

1332 self.tmpConfigFile = self.root2 

1333 Butler.makeRepo(self.root, config=Config(self.configFile), 

1334 outfile=self.tmpConfigFile) 

1335 

1336 def testConfigExistence(self): 

1337 # Append the yaml file else Config constructor does not know the file 

1338 # type. 

1339 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1340 super().testConfigExistence() 

1341 

1342 

1343class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1344 """Test that a config file created by makeRepo outside of repo works.""" 

1345 

1346 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1347 

1348 def setUp(self): 

1349 self.root = makeTestTempDir(TESTDIR) 

1350 self.root2 = makeTestTempDir(TESTDIR) 

1351 

1352 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

1353 Butler.makeRepo(self.root, config=Config(self.configFile), 

1354 outfile=self.tmpConfigFile) 

1355 

1356 

1357@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1358@mock_s3 

1359class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1360 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1361 a local in-memory SqlRegistry. 

1362 """ 

1363 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1364 fullConfigKey = None 

1365 validationCanFail = True 

1366 

1367 bucketName = "anybucketname" 

1368 """Name of the Bucket that will be used in the tests. The name is read from 

1369 the config file used with the tests during set-up. 

1370 """ 

1371 

1372 root = "butlerRoot/" 

1373 """Root repository directory expected to be used in case useTempRoot=False. 

1374 Otherwise the root is set to a 20 characters long randomly generated string 

1375 during set-up. 

1376 """ 

1377 

1378 datastoreStr = [f"datastore={root}"] 

1379 """Contains all expected root locations in a format expected to be 

1380 returned by Butler stringification. 

1381 """ 

1382 

1383 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1384 """The expected format of the S3 Datastore string.""" 

1385 

1386 registryStr = "/gen3.sqlite3" 

1387 """Expected format of the Registry string.""" 

1388 

1389 def genRoot(self): 

1390 """Returns a random string of len 20 to serve as a root 

1391 name for the temporary bucket repo. 

1392 

1393 This is equivalent to tempfile.mkdtemp as this is what self.root 

1394 becomes when useTempRoot is True. 

1395 """ 

1396 rndstr = "".join( 

1397 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1398 ) 

1399 return rndstr + "/" 

1400 

1401 def setUp(self): 

1402 config = Config(self.configFile) 

1403 uri = ButlerURI(config[".datastore.datastore.root"]) 

1404 self.bucketName = uri.netloc 

1405 

1406 # set up some fake credentials if they do not exist 

1407 self.usingDummyCredentials = setAwsEnvCredentials() 

1408 

1409 if self.useTempRoot: 

1410 self.root = self.genRoot() 

1411 rooturi = f"s3://{self.bucketName}/{self.root}" 

1412 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1413 

1414 # need local folder to store registry database 

1415 self.reg_dir = makeTestTempDir(TESTDIR) 

1416 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1417 

1418 # MOTO needs to know that we expect Bucket bucketname to exist 

1419 # (this used to be the class attribute bucketName) 

1420 s3 = boto3.resource("s3") 

1421 s3.create_bucket(Bucket=self.bucketName) 

1422 

1423 self.datastoreStr = f"datastore={self.root}" 

1424 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1425 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1426 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1427 

1428 def tearDown(self): 

1429 s3 = boto3.resource("s3") 

1430 bucket = s3.Bucket(self.bucketName) 

1431 try: 

1432 bucket.objects.all().delete() 

1433 except botocore.exceptions.ClientError as e: 

1434 if e.response["Error"]["Code"] == "404": 

1435 # the key was not reachable - pass 

1436 pass 

1437 else: 

1438 raise 

1439 

1440 bucket = s3.Bucket(self.bucketName) 

1441 bucket.delete() 

1442 

1443 # unset any potentially set dummy credentials 

1444 if self.usingDummyCredentials: 

1445 unsetAwsEnvCredentials() 

1446 

1447 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1448 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1449 

1450 if self.useTempRoot and os.path.exists(self.root): 

1451 shutil.rmtree(self.root, ignore_errors=True) 

1452 

1453 

1454@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1455# Mock required environment variables during tests 

1456@unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1457 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1458 TESTDIR, "config/testConfigs/webdav/token"), 

1459 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1460class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1461 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1462 a local in-memory SqlRegistry. 

1463 """ 

1464 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1465 fullConfigKey = None 

1466 validationCanFail = True 

1467 

1468 serverName = "localhost" 

1469 """Name of the server that will be used in the tests. 

1470 """ 

1471 

1472 portNumber = 8080 

1473 """Port on which the webdav server listens. Automatically chosen 

1474 at setUpClass via the _getfreeport() method 

1475 """ 

1476 

1477 root = "butlerRoot/" 

1478 """Root repository directory expected to be used in case useTempRoot=False. 

1479 Otherwise the root is set to a 20 characters long randomly generated string 

1480 during set-up. 

1481 """ 

1482 

1483 datastoreStr = [f"datastore={root}"] 

1484 """Contains all expected root locations in a format expected to be 

1485 returned by Butler stringification. 

1486 """ 

1487 

1488 datastoreName = ["FileDatastore@https://{serverName}/{root}"] 

1489 """The expected format of the WebdavDatastore string.""" 

1490 

1491 registryStr = "/gen3.sqlite3" 

1492 """Expected format of the Registry string.""" 

1493 

1494 serverThread = None 

1495 """Thread in which the local webdav server will run""" 

1496 

1497 stopWebdavServer = False 

1498 """This flag will cause the webdav server to 

1499 gracefully shut down when True 

1500 """ 

1501 

1502 def genRoot(self): 

1503 """Returns a random string of len 20 to serve as a root 

1504 name for the temporary bucket repo. 

1505 

1506 This is equivalent to tempfile.mkdtemp as this is what self.root 

1507 becomes when useTempRoot is True. 

1508 """ 

1509 rndstr = "".join( 

1510 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1511 ) 

1512 return rndstr + "/" 

1513 

1514 @classmethod 

1515 def setUpClass(cls): 

1516 # Do the same as inherited class 

1517 cls.storageClassFactory = StorageClassFactory() 

1518 cls.storageClassFactory.addFromConfig(cls.configFile) 

1519 

1520 cls.portNumber = cls._getfreeport() 

1521 # Run a local webdav server on which tests will be run 

1522 cls.serverThread = Thread(target=cls._serveWebdav, 

1523 args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), 

1524 daemon=True) 

1525 cls.serverThread.start() 

1526 # Wait for it to start 

1527 time.sleep(3) 

1528 

1529 @classmethod 

1530 def tearDownClass(cls): 

1531 # Ask for graceful shut down of the webdav server 

1532 cls.stopWebdavServer = True 

1533 # Wait for the thread to exit 

1534 cls.serverThread.join() 

1535 

1536 # Mock required environment variables during tests 

1537 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1538 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1539 TESTDIR, "config/testConfigs/webdav/token"), 

1540 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1541 def setUp(self): 

1542 config = Config(self.configFile) 

1543 

1544 if self.useTempRoot: 

1545 self.root = self.genRoot() 

1546 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1547 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1548 

1549 # need local folder to store registry database 

1550 self.reg_dir = makeTestTempDir(TESTDIR) 

1551 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1552 

1553 self.datastoreStr = f"datastore={self.root}" 

1554 self.datastoreName = [f"FileDatastore@{self.rooturi}"] 

1555 

1556 if not isWebdavEndpoint(self.rooturi): 

1557 raise OSError("Webdav server not running properly: cannot run tests.") 

1558 

1559 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1560 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1561 

1562 # Mock required environment variables during tests 

1563 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1564 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1565 TESTDIR, "config/testConfigs/webdav/token"), 

1566 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1567 def tearDown(self): 

1568 # Clear temporary directory 

1569 ButlerURI(self.rooturi).remove() 

1570 ButlerURI(self.rooturi).session.close() 

1571 

1572 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1573 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1574 

1575 if self.useTempRoot and os.path.exists(self.root): 

1576 shutil.rmtree(self.root, ignore_errors=True) 

1577 

1578 def _serveWebdav(self, port: int, stopWebdavServer): 

1579 """Starts a local webdav-compatible HTTP server, 

1580 Listening on http://localhost:port 

1581 This server only runs when this test class is instantiated, 

1582 and then shuts down. Must be started is a separate thread. 

1583 

1584 Parameters 

1585 ---------- 

1586 port : `int` 

1587 The port number on which the server should listen 

1588 """ 

1589 root_path = gettempdir() 

1590 

1591 config = { 

1592 "host": "0.0.0.0", 

1593 "port": port, 

1594 "provider_mapping": {"/": root_path}, 

1595 "http_authenticator": { 

1596 "domain_controller": None 

1597 }, 

1598 "simple_dc": {"user_mapping": {"*": True}}, 

1599 "verbose": 0, 

1600 } 

1601 app = WsgiDAVApp(config) 

1602 

1603 server_args = { 

1604 "bind_addr": (config["host"], config["port"]), 

1605 "wsgi_app": app, 

1606 } 

1607 server = wsgi.Server(**server_args) 

1608 server.prepare() 

1609 

1610 try: 

1611 # Start the actual server in a separate thread 

1612 t = Thread(target=server.serve, daemon=True) 

1613 t.start() 

1614 # watch stopWebdavServer, and gracefully 

1615 # shut down the server when True 

1616 while True: 

1617 if stopWebdavServer(): 

1618 break 

1619 time.sleep(1) 

1620 except KeyboardInterrupt: 

1621 print("Caught Ctrl-C, shutting down...") 

1622 finally: 

1623 server.stop() 

1624 t.join() 

1625 

1626 def _getfreeport(): 

1627 """ 

1628 Determines a free port using sockets. 

1629 """ 

1630 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

1631 free_socket.bind(('0.0.0.0', 0)) 

1632 free_socket.listen() 

1633 port = free_socket.getsockname()[1] 

1634 free_socket.close() 

1635 return port 

1636 

1637 

1638class PosixDatastoreTransfers(unittest.TestCase): 

1639 """Test data transfers between butlers. 

1640 

1641 Test for different managers. UUID to UUID and integer to integer are 

1642 tested. UUID to integer is not supported since we do not currently 

1643 want to allow that. Integer to UUID is supported with the caveat 

1644 that UUID4 will be generated and this will be incorrect for raw 

1645 dataset types. The test ignores that. 

1646 """ 

1647 

1648 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1649 

1650 @classmethod 

1651 def setUpClass(cls): 

1652 cls.storageClassFactory = StorageClassFactory() 

1653 cls.storageClassFactory.addFromConfig(cls.configFile) 

1654 

1655 def setUp(self): 

1656 self.root = makeTestTempDir(TESTDIR) 

1657 self.config = Config(self.configFile) 

1658 

1659 def tearDown(self): 

1660 removeTestTempDir(self.root) 

1661 

1662 def create_butler(self, manager, label): 

1663 config = Config(self.configFile) 

1664 config["registry", "managers", "datasets"] = manager 

1665 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), 

1666 writeable=True) 

1667 

1668 def create_butlers(self, manager1, manager2): 

1669 self.source_butler = self.create_butler(manager1, "1") 

1670 self.target_butler = self.create_butler(manager2, "2") 

1671 

1672 def testTransferUuidToUuid(self): 

1673 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions." 

1674 "ByDimensionsDatasetRecordStorageManagerUUID", 

1675 "lsst.daf.butler.registry.datasets.byDimensions." 

1676 "ByDimensionsDatasetRecordStorageManagerUUID", 

1677 ) 

1678 # Setting id_gen_map should have no effect here 

1679 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1680 

1681 def testTransferIntToInt(self): 

1682 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions." 

1683 "ByDimensionsDatasetRecordStorageManager", 

1684 "lsst.daf.butler.registry.datasets.byDimensions." 

1685 "ByDimensionsDatasetRecordStorageManager", 

1686 ) 

1687 # int dataset ID only allows UNIQUE 

1688 self.assertButlerTransfers() 

1689 

1690 def testTransferIntToUuid(self): 

1691 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions." 

1692 "ByDimensionsDatasetRecordStorageManager", 

1693 "lsst.daf.butler.registry.datasets.byDimensions." 

1694 "ByDimensionsDatasetRecordStorageManagerUUID", 

1695 ) 

1696 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1697 

1698 def testTransferMissing(self): 

1699 """Test transfers where datastore records are missing. 

1700 

1701 This is how execution butler works. 

1702 """ 

1703 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions." 

1704 "ByDimensionsDatasetRecordStorageManagerUUID", 

1705 "lsst.daf.butler.registry.datasets.byDimensions." 

1706 "ByDimensionsDatasetRecordStorageManagerUUID", 

1707 ) 

1708 

1709 # Configure the source butler to allow trust. 

1710 self.source_butler.datastore.trustGetRequest = True 

1711 

1712 self.assertButlerTransfers(purge=True) 

1713 

1714 def testTransferMissingDisassembly(self): 

1715 """Test transfers where datastore records are missing. 

1716 

1717 This is how execution butler works. 

1718 """ 

1719 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions." 

1720 "ByDimensionsDatasetRecordStorageManagerUUID", 

1721 "lsst.daf.butler.registry.datasets.byDimensions." 

1722 "ByDimensionsDatasetRecordStorageManagerUUID", 

1723 ) 

1724 

1725 # Configure the source butler to allow trust. 

1726 self.source_butler.datastore.trustGetRequest = True 

1727 

1728 # Test disassembly. 

1729 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

1730 

1731 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"): 

1732 """Test that a run can be transferred to another butler.""" 

1733 

1734 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

1735 datasetTypeName = "random_data" 

1736 

1737 # Test will create 3 collections and we will want to transfer 

1738 # two of those three. 

1739 runs = ["run1", "run2", "other"] 

1740 

1741 # Also want to use two different dataset types to ensure that 

1742 # grouping works. 

1743 datasetTypeNames = ["random_data", "random_data_2"] 

1744 

1745 # Create the run collections in the source butler. 

1746 for run in runs: 

1747 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

1748 

1749 # Create dimensions in both butlers (transfer will not create them). 

1750 n_exposures = 30 

1751 for butler in (self.source_butler, self.target_butler): 

1752 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1753 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

1754 "name": "d-r", 

1755 "band": "R"}) 

1756 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", 

1757 "id": 1, "full_name": "det1"}) 

1758 

1759 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1760 for datasetTypeName in datasetTypeNames: 

1761 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1762 butler.registry.registerDatasetType(datasetType) 

1763 

1764 for i in range(n_exposures): 

1765 butler.registry.insertDimensionData("exposure", {"instrument": "DummyCamComp", 

1766 "id": i, "obs_id": f"exp{i}", 

1767 "physical_filter": "d-r"}) 

1768 

1769 # Write a dataset to an unrelated run -- this will ensure that 

1770 # we are rewriting integer dataset ids in the target if necessary. 

1771 # Will not be relevant for UUID. 

1772 run = "distraction" 

1773 butler = Butler(butler=self.source_butler, run=run) 

1774 butler.put(makeExampleMetrics(), datasetTypeName, 

1775 exposure=1, detector=1, instrument="DummyCamComp", physical_filter="d-r") 

1776 

1777 # Write some example metrics to the source 

1778 butler = Butler(butler=self.source_butler) 

1779 

1780 # Set of DatasetRefs that should be in the list of refs to transfer 

1781 # but which will not be transferred. 

1782 deleted = set() 

1783 

1784 n_expected = 20 # Number of datasets expected to be transferred 

1785 source_refs = [] 

1786 for i in range(n_exposures): 

1787 # Put a third of datasets into each collection, only retain 

1788 # two thirds. 

1789 index = i % 3 

1790 run = runs[index] 

1791 datasetTypeName = datasetTypeNames[i % 2] 

1792 

1793 metric_data = {"summary": {"counter": i}, 

1794 "output": {"text": "metric"}, 

1795 "data": [2*x for x in range(i)]} 

1796 metric = MetricsExample(**metric_data) 

1797 dataId = {"exposure": i, "detector": 1, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1798 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

1799 

1800 # Remove the datastore record using low-level API 

1801 if purge: 

1802 # Remove records for a fraction. 

1803 if index == 1: 

1804 

1805 # For one of these delete the file as well. 

1806 # This allows the "missing" code to filter the 

1807 # file out. 

1808 if not deleted: 

1809 primary, uris = butler.datastore.getURIs(ref) 

1810 if primary: 

1811 primary.remove() 

1812 for uri in uris.values(): 

1813 uri.remove() 

1814 n_expected -= 1 

1815 deleted.add(ref) 

1816 

1817 # Remove the datastore record. 

1818 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

1819 

1820 if index < 2: 

1821 source_refs.append(ref) 

1822 if ref not in deleted: 

1823 new_metric = butler.get(ref.unresolved(), collections=run) 

1824 self.assertEqual(new_metric, metric) 

1825 

1826 # Now transfer them to the second butler 

1827 with self.assertLogs(level=logging.DEBUG) as cm: 

1828 transferred = self.target_butler.transfer_from(self.source_butler, source_refs, 

1829 id_gen_map=id_gen_map) 

1830 self.assertEqual(len(transferred), n_expected) 

1831 log_output = ";".join(cm.output) 

1832 self.assertIn("found in datastore for chunk", log_output) 

1833 

1834 # Do the transfer twice to ensure that it will do nothing extra. 

1835 # Only do this if purge=True because it does not work for int 

1836 # dataset_id. 

1837 if purge: 

1838 transferred = self.target_butler.transfer_from(self.source_butler, source_refs, 

1839 id_gen_map=id_gen_map) 

1840 self.assertEqual(len(transferred), n_expected) 

1841 

1842 # Also do an explicit low-level transfer to trigger some 

1843 # edge cases. 

1844 with self.assertLogs(level=logging.DEBUG) as cm: 

1845 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

1846 log_output = ";".join(cm.output) 

1847 self.assertIn("no file artifacts exist", log_output) 

1848 

1849 with self.assertRaises(TypeError): 

1850 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) 

1851 

1852 with self.assertRaises(ValueError): 

1853 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs, 

1854 transfer="split") 

1855 

1856 # Now try to get the same refs from the new butler. 

1857 for ref in source_refs: 

1858 if ref not in deleted: 

1859 unresolved_ref = ref.unresolved() 

1860 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run) 

1861 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run) 

1862 self.assertEqual(new_metric, old_metric) 

1863 

1864 

1865if __name__ == "__main__": 1865 ↛ 1866line 1865 didn't jump to line 1866, because the condition on line 1865 was never true

1866 unittest.main()