Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import os 

26import posixpath 

27import unittest 

28import tempfile 

29import shutil 

30import pickle 

31import string 

32import random 

33import time 

34import socket 

35 

36try: 

37 import boto3 

38 import botocore 

39 from moto import mock_s3 

40except ImportError: 

41 boto3 = None 

42 

43 def mock_s3(cls): 

44 """A no-op decorator in case moto mock_s3 can not be imported. 

45 """ 

46 return cls 

47 

48try: 

49 from cheroot import wsgi 

50 from wsgidav.wsgidav_app import WsgiDAVApp 

51except ImportError: 

52 WsgiDAVApp = None 

53 

54import astropy.time 

55from threading import Thread 

56from tempfile import gettempdir 

57from lsst.utils import doImport 

58from lsst.daf.butler.core.utils import safeMakeDir 

59from lsst.daf.butler import Butler, Config, ButlerConfig 

60from lsst.daf.butler import StorageClassFactory 

61from lsst.daf.butler import DatasetType, DatasetRef, DatasetIdGenEnum 

62from lsst.daf.butler import FileTemplateValidationError, ValidationError 

63from lsst.daf.butler import FileDataset 

64from lsst.daf.butler import CollectionSearch, CollectionType 

65from lsst.daf.butler import ButlerURI 

66from lsst.daf.butler import script 

67from lsst.daf.butler.registry import MissingCollectionError, ConflictingDefinitionError 

68from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

69from lsst.daf.butler.core._butlerUri.s3utils import (setAwsEnvCredentials, 

70 unsetAwsEnvCredentials) 

71from lsst.daf.butler.core._butlerUri.http import isWebdavEndpoint 

72 

73from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

74from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

75 

76TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

77 

78 

79def makeExampleMetrics(): 

80 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

81 {"a": [1, 2, 3], 

82 "b": {"blue": 5, "red": "green"}}, 

83 [563, 234, 456.7, 752, 8, 9, 27] 

84 ) 

85 

86 

87class TransactionTestError(Exception): 

88 """Specific error for testing transactions, to prevent misdiagnosing 

89 that might otherwise occur when a standard exception is used. 

90 """ 

91 pass 

92 

93 

94class ButlerConfigTests(unittest.TestCase): 

95 """Simple tests for ButlerConfig that are not tested in other test cases. 

96 """ 

97 

98 def testSearchPath(self): 

99 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

100 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

101 config1 = ButlerConfig(configFile) 

102 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

103 

104 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

105 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

106 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

107 self.assertIn("testConfigs", "\n".join(cm.output)) 

108 

109 key = ("datastore", "records", "table") 

110 self.assertNotEqual(config1[key], config2[key]) 

111 self.assertEqual(config2[key], "override_record") 

112 

113 

114class ButlerPutGetTests: 

115 """Helper method for running a suite of put/get tests from different 

116 butler configurations.""" 

117 

118 root = None 

119 

120 @staticmethod 

121 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

122 """Create a DatasetType and register it 

123 """ 

124 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

125 registry.registerDatasetType(datasetType) 

126 return datasetType 

127 

128 @classmethod 

129 def setUpClass(cls): 

130 cls.storageClassFactory = StorageClassFactory() 

131 cls.storageClassFactory.addFromConfig(cls.configFile) 

132 

133 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

134 datasetType = datasetRef.datasetType 

135 dataId = datasetRef.dataId 

136 deferred = butler.getDirectDeferred(datasetRef) 

137 

138 for component in components: 

139 compTypeName = datasetType.componentTypeName(component) 

140 result = butler.get(compTypeName, dataId, collections=collections) 

141 self.assertEqual(result, getattr(reference, component)) 

142 result_deferred = deferred.get(component=component) 

143 self.assertEqual(result_deferred, result) 

144 

145 def tearDown(self): 

146 removeTestTempDir(self.root) 

147 

148 def runPutGetTest(self, storageClass, datasetTypeName): 

149 # New datasets will be added to run and tag, but we will only look in 

150 # tag when looking up datasets. 

151 run = "ingest" 

152 butler = Butler(self.tmpConfigFile, run=run) 

153 

154 collections = set(butler.registry.queryCollections()) 

155 self.assertEqual(collections, set([run])) 

156 

157 # Create and register a DatasetType 

158 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

159 

160 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

161 

162 # Add needed Dimensions 

163 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

164 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

165 "name": "d-r", 

166 "band": "R"}) 

167 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp", 

168 "id": 1, 

169 "name": "default"}) 

170 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

171 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

172 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

173 "name": "fourtwentythree", "physical_filter": "d-r", 

174 "visit_system": 1, "datetime_begin": visit_start, 

175 "datetime_end": visit_end}) 

176 

177 # Add a second visit for some later tests 

178 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424, 

179 "name": "fourtwentyfour", "physical_filter": "d-r", 

180 "visit_system": 1}) 

181 

182 # Create and store a dataset 

183 metric = makeExampleMetrics() 

184 dataId = {"instrument": "DummyCamComp", "visit": 423} 

185 

186 # Create a DatasetRef for put 

187 refIn = DatasetRef(datasetType, dataId, id=None) 

188 

189 # Put with a preexisting id should fail 

190 with self.assertRaises(ValueError): 

191 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

192 

193 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

194 # and once with a DatasetType 

195 

196 # Keep track of any collections we add and do not clean up 

197 expected_collections = {run} 

198 

199 counter = 0 

200 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

201 # Since we are using subTest we can get cascading failures 

202 # here with the first attempt failing and the others failing 

203 # immediately because the dataset already exists. Work around 

204 # this by using a distinct run collection each time 

205 counter += 1 

206 this_run = f"put_run_{counter}" 

207 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

208 expected_collections.update({this_run}) 

209 

210 with self.subTest(args=args): 

211 ref = butler.put(metric, *args, run=this_run) 

212 self.assertIsInstance(ref, DatasetRef) 

213 

214 # Test getDirect 

215 metricOut = butler.getDirect(ref) 

216 self.assertEqual(metric, metricOut) 

217 # Test get 

218 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

219 self.assertEqual(metric, metricOut) 

220 # Test get with a datasetRef 

221 metricOut = butler.get(ref, collections=this_run) 

222 self.assertEqual(metric, metricOut) 

223 # Test getDeferred with dataId 

224 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

225 self.assertEqual(metric, metricOut) 

226 # Test getDeferred with a datasetRef 

227 metricOut = butler.getDeferred(ref, collections=this_run).get() 

228 self.assertEqual(metric, metricOut) 

229 # and deferred direct with ref 

230 metricOut = butler.getDirectDeferred(ref).get() 

231 self.assertEqual(metric, metricOut) 

232 

233 # Check we can get components 

234 if storageClass.isComposite(): 

235 self.assertGetComponents(butler, ref, 

236 ("summary", "data", "output"), metric, 

237 collections=this_run) 

238 

239 # Can the artifacts themselves be retrieved? 

240 if not butler.datastore.isEphemeral: 

241 root_uri = ButlerURI(self.root) 

242 

243 for preserve_path in (True, False): 

244 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

245 transferred = butler.retrieveArtifacts([ref], destination, 

246 preserve_path=preserve_path) 

247 self.assertGreater(len(transferred), 0) 

248 artifacts = list(ButlerURI.findFileResources([destination])) 

249 self.assertEqual(set(transferred), set(artifacts)) 

250 

251 for artifact in transferred: 

252 path_in_destination = artifact.relative_to(destination) 

253 self.assertIsNotNone(path_in_destination) 

254 

255 # when path is not preserved there should not be 

256 # any path separators. 

257 num_seps = path_in_destination.count("/") 

258 if preserve_path: 

259 self.assertGreater(num_seps, 0) 

260 else: 

261 self.assertEqual(num_seps, 0) 

262 

263 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

264 n_uris = len(secondary_uris) 

265 if primary_uri: 

266 n_uris += 1 

267 self.assertEqual(len(artifacts), n_uris, "Comparing expected artifacts vs actual:" 

268 f" {artifacts} vs {primary_uri} and {secondary_uris}") 

269 

270 if preserve_path: 

271 # No need to run these twice 

272 with self.assertRaises(ValueError): 

273 butler.retrieveArtifacts([ref], destination, transfer="move") 

274 

275 with self.assertRaises(FileExistsError): 

276 butler.retrieveArtifacts([ref], destination) 

277 

278 transferred_again = butler.retrieveArtifacts([ref], destination, 

279 preserve_path=preserve_path, 

280 overwrite=True) 

281 self.assertEqual(set(transferred_again), set(transferred)) 

282 

283 # Now remove the dataset completely. 

284 butler.pruneDatasets([ref], purge=True, unstore=True, run=this_run) 

285 # Lookup with original args should still fail. 

286 with self.assertRaises(LookupError): 

287 butler.datasetExists(*args, collections=this_run) 

288 # getDirect() should still fail. 

289 with self.assertRaises(FileNotFoundError): 

290 butler.getDirect(ref) 

291 # Registry shouldn't be able to find it by dataset_id anymore. 

292 self.assertIsNone(butler.registry.getDataset(ref.id)) 

293 

294 # Do explicit registry removal since we know they are 

295 # empty 

296 butler.registry.removeCollection(this_run) 

297 expected_collections.remove(this_run) 

298 

299 # Put the dataset again, since the last thing we did was remove it 

300 # and we want to use the default collection. 

301 ref = butler.put(metric, refIn) 

302 

303 # Get with parameters 

304 stop = 4 

305 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

306 self.assertNotEqual(metric, sliced) 

307 self.assertEqual(metric.summary, sliced.summary) 

308 self.assertEqual(metric.output, sliced.output) 

309 self.assertEqual(metric.data[:stop], sliced.data) 

310 # getDeferred with parameters 

311 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

312 self.assertNotEqual(metric, sliced) 

313 self.assertEqual(metric.summary, sliced.summary) 

314 self.assertEqual(metric.output, sliced.output) 

315 self.assertEqual(metric.data[:stop], sliced.data) 

316 # getDeferred with deferred parameters 

317 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

318 self.assertNotEqual(metric, sliced) 

319 self.assertEqual(metric.summary, sliced.summary) 

320 self.assertEqual(metric.output, sliced.output) 

321 self.assertEqual(metric.data[:stop], sliced.data) 

322 

323 if storageClass.isComposite(): 

324 # Check that components can be retrieved 

325 metricOut = butler.get(ref.datasetType.name, dataId) 

326 compNameS = ref.datasetType.componentTypeName("summary") 

327 compNameD = ref.datasetType.componentTypeName("data") 

328 summary = butler.get(compNameS, dataId) 

329 self.assertEqual(summary, metric.summary) 

330 data = butler.get(compNameD, dataId) 

331 self.assertEqual(data, metric.data) 

332 

333 if "counter" in storageClass.derivedComponents: 

334 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

335 self.assertEqual(count, len(data)) 

336 

337 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId, 

338 parameters={"slice": slice(stop)}) 

339 self.assertEqual(count, stop) 

340 

341 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

342 summary = butler.getDirect(compRef) 

343 self.assertEqual(summary, metric.summary) 

344 

345 # Create a Dataset type that has the same name but is inconsistent. 

346 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions, 

347 self.storageClassFactory.getStorageClass("Config")) 

348 

349 # Getting with a dataset type that does not match registry fails 

350 with self.assertRaises(ValueError): 

351 butler.get(inconsistentDatasetType, dataId) 

352 

353 # Combining a DatasetRef with a dataId should fail 

354 with self.assertRaises(ValueError): 

355 butler.get(ref, dataId) 

356 # Getting with an explicit ref should fail if the id doesn't match 

357 with self.assertRaises(ValueError): 

358 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

359 

360 # Getting a dataset with unknown parameters should fail 

361 with self.assertRaises(KeyError): 

362 butler.get(ref, parameters={"unsupported": True}) 

363 

364 # Check we have a collection 

365 collections = set(butler.registry.queryCollections()) 

366 self.assertEqual(collections, expected_collections) 

367 

368 # Clean up to check that we can remove something that may have 

369 # already had a component removed 

370 butler.pruneDatasets([ref], unstore=True, purge=True) 

371 

372 # Check that we can configure a butler to accept a put even 

373 # if it already has the dataset in registry. 

374 ref = butler.put(metric, refIn) 

375 

376 # Repeat put will fail. 

377 with self.assertRaises(ConflictingDefinitionError): 

378 butler.put(metric, refIn) 

379 

380 # Remove the datastore entry. 

381 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

382 

383 # Put will still fail 

384 with self.assertRaises(ConflictingDefinitionError): 

385 butler.put(metric, refIn) 

386 

387 # Allow the put to succeed 

388 butler._allow_put_of_predefined_dataset = True 

389 ref2 = butler.put(metric, refIn) 

390 self.assertEqual(ref2.id, ref.id) 

391 

392 # A second put will still fail but with a different exception 

393 # than before. 

394 with self.assertRaises(ConflictingDefinitionError): 

395 butler.put(metric, refIn) 

396 

397 # Reset the flag to avoid confusion 

398 butler._allow_put_of_predefined_dataset = False 

399 

400 # Leave the dataset in place since some downstream tests require 

401 # something to be present 

402 

403 return butler 

404 

405 def testDeferredCollectionPassing(self): 

406 # Construct a butler with no run or collection, but make it writeable. 

407 butler = Butler(self.tmpConfigFile, writeable=True) 

408 # Create and register a DatasetType 

409 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

410 datasetType = self.addDatasetType("example", dimensions, 

411 self.storageClassFactory.getStorageClass("StructuredData"), 

412 butler.registry) 

413 # Add needed Dimensions 

414 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

415 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

416 "name": "d-r", 

417 "band": "R"}) 

418 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

419 "name": "fourtwentythree", "physical_filter": "d-r"}) 

420 dataId = {"instrument": "DummyCamComp", "visit": 423} 

421 # Create dataset. 

422 metric = makeExampleMetrics() 

423 # Register a new run and put dataset. 

424 run = "deferred" 

425 butler.registry.registerRun(run) 

426 ref = butler.put(metric, datasetType, dataId, run=run) 

427 # Putting with no run should fail with TypeError. 

428 with self.assertRaises(TypeError): 

429 butler.put(metric, datasetType, dataId) 

430 # Dataset should exist. 

431 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

432 # We should be able to get the dataset back, but with and without 

433 # a deferred dataset handle. 

434 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

435 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

436 # Trying to find the dataset without any collection is a TypeError. 

437 with self.assertRaises(TypeError): 

438 butler.datasetExists(datasetType, dataId) 

439 with self.assertRaises(TypeError): 

440 butler.get(datasetType, dataId) 

441 # Associate the dataset with a different collection. 

442 butler.registry.registerCollection("tagged") 

443 butler.registry.associate("tagged", [ref]) 

444 # Deleting the dataset from the new collection should make it findable 

445 # in the original collection. 

446 butler.pruneDatasets([ref], tags=["tagged"]) 

447 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

448 

449 

450class ButlerTests(ButlerPutGetTests): 

451 """Tests for Butler. 

452 """ 

453 useTempRoot = True 

454 

455 def setUp(self): 

456 """Create a new butler root for each test.""" 

457 self.root = makeTestTempDir(TESTDIR) 

458 Butler.makeRepo(self.root, config=Config(self.configFile)) 

459 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

460 

461 def testConstructor(self): 

462 """Independent test of constructor. 

463 """ 

464 butler = Butler(self.tmpConfigFile, run="ingest") 

465 self.assertIsInstance(butler, Butler) 

466 

467 collections = set(butler.registry.queryCollections()) 

468 self.assertEqual(collections, {"ingest"}) 

469 

470 butler2 = Butler(butler=butler, collections=["other"]) 

471 self.assertEqual( 

472 butler2.collections, 

473 CollectionSearch.fromExpression(["other"]) 

474 ) 

475 self.assertIsNone(butler2.run) 

476 self.assertIs(butler.datastore, butler2.datastore) 

477 

478 def testBasicPutGet(self): 

479 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

480 self.runPutGetTest(storageClass, "test_metric") 

481 

482 def testCompositePutGetConcrete(self): 

483 

484 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

485 butler = self.runPutGetTest(storageClass, "test_metric") 

486 

487 # Should *not* be disassembled 

488 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

489 self.assertEqual(len(datasets), 1) 

490 uri, components = butler.getURIs(datasets[0]) 

491 self.assertIsInstance(uri, ButlerURI) 

492 self.assertFalse(components) 

493 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

494 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

495 

496 # Predicted dataset 

497 dataId = {"instrument": "DummyCamComp", "visit": 424} 

498 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

499 self.assertFalse(components) 

500 self.assertIsInstance(uri, ButlerURI) 

501 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

502 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

503 

504 def testCompositePutGetVirtual(self): 

505 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

506 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

507 

508 # Should be disassembled 

509 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

510 self.assertEqual(len(datasets), 1) 

511 uri, components = butler.getURIs(datasets[0]) 

512 

513 if butler.datastore.isEphemeral: 

514 # Never disassemble in-memory datastore 

515 self.assertIsInstance(uri, ButlerURI) 

516 self.assertFalse(components) 

517 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

518 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

519 else: 

520 self.assertIsNone(uri) 

521 self.assertEqual(set(components), set(storageClass.components)) 

522 for compuri in components.values(): 

523 self.assertIsInstance(compuri, ButlerURI) 

524 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

525 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

526 

527 # Predicted dataset 

528 dataId = {"instrument": "DummyCamComp", "visit": 424} 

529 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

530 

531 if butler.datastore.isEphemeral: 

532 # Never disassembled 

533 self.assertIsInstance(uri, ButlerURI) 

534 self.assertFalse(components) 

535 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

536 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

537 else: 

538 self.assertIsNone(uri) 

539 self.assertEqual(set(components), set(storageClass.components)) 

540 for compuri in components.values(): 

541 self.assertIsInstance(compuri, ButlerURI) 

542 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

543 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

544 

545 def testIngest(self): 

546 butler = Butler(self.tmpConfigFile, run="ingest") 

547 

548 # Create and register a DatasetType 

549 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

550 

551 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

552 datasetTypeName = "metric" 

553 

554 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

555 

556 # Add needed Dimensions 

557 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

558 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

559 "name": "d-r", 

560 "band": "R"}) 

561 for detector in (1, 2): 

562 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

563 "full_name": f"detector{detector}"}) 

564 

565 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

566 "name": "fourtwentythree", "physical_filter": "d-r"}, 

567 {"instrument": "DummyCamComp", "id": 424, 

568 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

569 

570 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

571 dataRoot = os.path.join(TESTDIR, "data", "basic") 

572 datasets = [] 

573 for detector in (1, 2): 

574 detector_name = f"detector_{detector}" 

575 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

576 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

577 # Create a DatasetRef for ingest 

578 refIn = DatasetRef(datasetType, dataId, id=None) 

579 

580 datasets.append(FileDataset(path=metricFile, 

581 refs=[refIn], 

582 formatter=formatter)) 

583 

584 butler.ingest(*datasets, transfer="copy") 

585 

586 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

587 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

588 

589 metrics1 = butler.get(datasetTypeName, dataId1) 

590 metrics2 = butler.get(datasetTypeName, dataId2) 

591 self.assertNotEqual(metrics1, metrics2) 

592 

593 # Compare URIs 

594 uri1 = butler.getURI(datasetTypeName, dataId1) 

595 uri2 = butler.getURI(datasetTypeName, dataId2) 

596 self.assertNotEqual(uri1, uri2) 

597 

598 # Now do a multi-dataset but single file ingest 

599 metricFile = os.path.join(dataRoot, "detectors.yaml") 

600 refs = [] 

601 for detector in (1, 2): 

602 detector_name = f"detector_{detector}" 

603 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

604 # Create a DatasetRef for ingest 

605 refs.append(DatasetRef(datasetType, dataId, id=None)) 

606 

607 datasets = [] 

608 datasets.append(FileDataset(path=metricFile, 

609 refs=refs, 

610 formatter=MultiDetectorFormatter)) 

611 

612 butler.ingest(*datasets, transfer="copy") 

613 

614 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

615 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

616 

617 multi1 = butler.get(datasetTypeName, dataId1) 

618 multi2 = butler.get(datasetTypeName, dataId2) 

619 

620 self.assertEqual(multi1, metrics1) 

621 self.assertEqual(multi2, metrics2) 

622 

623 # Compare URIs 

624 uri1 = butler.getURI(datasetTypeName, dataId1) 

625 uri2 = butler.getURI(datasetTypeName, dataId2) 

626 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

627 

628 # Test that removing one does not break the second 

629 # This line will issue a warning log message for a ChainedDatastore 

630 # that uses an InMemoryDatastore since in-memory can not ingest 

631 # files. 

632 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

633 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

634 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

635 multi2b = butler.get(datasetTypeName, dataId2) 

636 self.assertEqual(multi2, multi2b) 

637 

638 def testPruneCollections(self): 

639 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

640 butler = Butler(self.tmpConfigFile, writeable=True) 

641 # Load registry data with dimensions to hang datasets off of. 

642 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

643 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

644 # Add some RUN-type collections. 

645 run1 = "run1" 

646 butler.registry.registerRun(run1) 

647 run2 = "run2" 

648 butler.registry.registerRun(run2) 

649 # put some datasets. ref1 and ref2 have the same data ID, and are in 

650 # different runs. ref3 has a different data ID. 

651 metric = makeExampleMetrics() 

652 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

653 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

654 butler.registry) 

655 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

656 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

657 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

658 

659 # Try to delete a RUN collection without purge, or with purge and not 

660 # unstore. 

661 with self.assertRaises(TypeError): 

662 butler.pruneCollection(run1) 

663 with self.assertRaises(TypeError): 

664 butler.pruneCollection(run2, purge=True) 

665 # Add a TAGGED collection and associate ref3 only into it. 

666 tag1 = "tag1" 

667 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

668 butler.registry.associate(tag1, [ref3]) 

669 # Add a CHAINED collection that searches run1 and then run2. It 

670 # logically contains only ref1, because ref2 is shadowed due to them 

671 # having the same data ID and dataset type. 

672 chain1 = "chain1" 

673 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

674 butler.registry.setCollectionChain(chain1, [run1, run2]) 

675 # Try to delete RUN collections, which should fail with complete 

676 # rollback because they're still referenced by the CHAINED 

677 # collection. 

678 with self.assertRaises(Exception): 

679 butler.pruneCollection(run1, pruge=True, unstore=True) 

680 with self.assertRaises(Exception): 

681 butler.pruneCollection(run2, pruge=True, unstore=True) 

682 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

683 [ref1, ref2, ref3]) 

684 self.assertTrue(butler.datastore.exists(ref1)) 

685 self.assertTrue(butler.datastore.exists(ref2)) 

686 self.assertTrue(butler.datastore.exists(ref3)) 

687 # Try to delete CHAINED and TAGGED collections with purge; should not 

688 # work. 

689 with self.assertRaises(TypeError): 

690 butler.pruneCollection(tag1, purge=True, unstore=True) 

691 with self.assertRaises(TypeError): 

692 butler.pruneCollection(chain1, purge=True, unstore=True) 

693 # Remove the tagged collection with unstore=False. This should not 

694 # affect the datasets. 

695 butler.pruneCollection(tag1) 

696 with self.assertRaises(MissingCollectionError): 

697 butler.registry.getCollectionType(tag1) 

698 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

699 [ref1, ref2, ref3]) 

700 self.assertTrue(butler.datastore.exists(ref1)) 

701 self.assertTrue(butler.datastore.exists(ref2)) 

702 self.assertTrue(butler.datastore.exists(ref3)) 

703 # Add the tagged collection back in, and remove it with unstore=True. 

704 # This should remove ref3 only from the datastore. 

705 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

706 butler.registry.associate(tag1, [ref3]) 

707 butler.pruneCollection(tag1, unstore=True) 

708 with self.assertRaises(MissingCollectionError): 

709 butler.registry.getCollectionType(tag1) 

710 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

711 [ref1, ref2, ref3]) 

712 self.assertTrue(butler.datastore.exists(ref1)) 

713 self.assertTrue(butler.datastore.exists(ref2)) 

714 self.assertFalse(butler.datastore.exists(ref3)) 

715 # Delete the chain with unstore=False. The datasets should not be 

716 # affected at all. 

717 butler.pruneCollection(chain1) 

718 with self.assertRaises(MissingCollectionError): 

719 butler.registry.getCollectionType(chain1) 

720 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

721 [ref1, ref2, ref3]) 

722 self.assertTrue(butler.datastore.exists(ref1)) 

723 self.assertTrue(butler.datastore.exists(ref2)) 

724 self.assertFalse(butler.datastore.exists(ref3)) 

725 # Redefine and then delete the chain with unstore=True. Only ref1 

726 # should be unstored (ref3 has already been unstored, but otherwise 

727 # would be now). 

728 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

729 butler.registry.setCollectionChain(chain1, [run1, run2]) 

730 butler.pruneCollection(chain1, unstore=True) 

731 with self.assertRaises(MissingCollectionError): 

732 butler.registry.getCollectionType(chain1) 

733 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

734 [ref1, ref2, ref3]) 

735 self.assertFalse(butler.datastore.exists(ref1)) 

736 self.assertTrue(butler.datastore.exists(ref2)) 

737 self.assertFalse(butler.datastore.exists(ref3)) 

738 # Remove run1. This removes ref1 and ref3 from the registry (they're 

739 # already gone from the datastore, which is fine). 

740 butler.pruneCollection(run1, purge=True, unstore=True) 

741 with self.assertRaises(MissingCollectionError): 

742 butler.registry.getCollectionType(run1) 

743 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

744 [ref2]) 

745 self.assertTrue(butler.datastore.exists(ref2)) 

746 # Remove run2. This removes ref2 from the registry and the datastore. 

747 butler.pruneCollection(run2, purge=True, unstore=True) 

748 with self.assertRaises(MissingCollectionError): 

749 butler.registry.getCollectionType(run2) 

750 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

751 []) 

752 

753 # Now that the collections have been pruned we can remove the 

754 # dataset type 

755 butler.registry.removeDatasetType(datasetType.name) 

756 

757 def testPickle(self): 

758 """Test pickle support. 

759 """ 

760 butler = Butler(self.tmpConfigFile, run="ingest") 

761 butlerOut = pickle.loads(pickle.dumps(butler)) 

762 self.assertIsInstance(butlerOut, Butler) 

763 self.assertEqual(butlerOut._config, butler._config) 

764 self.assertEqual(butlerOut.collections, butler.collections) 

765 self.assertEqual(butlerOut.run, butler.run) 

766 

767 def testGetDatasetTypes(self): 

768 butler = Butler(self.tmpConfigFile, run="ingest") 

769 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

770 dimensionEntries = [ 

771 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

772 {"instrument": "DummyCamComp"}), 

773 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

774 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

775 ] 

776 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

777 # Add needed Dimensions 

778 for args in dimensionEntries: 

779 butler.registry.insertDimensionData(*args) 

780 

781 # When a DatasetType is added to the registry entries are not created 

782 # for components but querying them can return the components. 

783 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

784 components = set() 

785 for datasetTypeName in datasetTypeNames: 

786 # Create and register a DatasetType 

787 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

788 

789 for componentName in storageClass.components: 

790 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

791 

792 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

793 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

794 

795 # Now that we have some dataset types registered, validate them 

796 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

797 "datasetType.component", "random_data", "random_data_2"]) 

798 

799 # Add a new datasetType that will fail template validation 

800 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

801 if self.validationCanFail: 

802 with self.assertRaises(ValidationError): 

803 butler.validateConfiguration() 

804 

805 # Rerun validation but with a subset of dataset type names 

806 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

807 

808 # Rerun validation but ignore the bad datasetType 

809 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

810 "datasetType.component", "random_data", "random_data_2"]) 

811 

812 def testTransaction(self): 

813 butler = Butler(self.tmpConfigFile, run="ingest") 

814 datasetTypeName = "test_metric" 

815 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

816 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

817 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

818 "band": "R"}), 

819 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

820 "physical_filter": "d-r"})) 

821 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

822 metric = makeExampleMetrics() 

823 dataId = {"instrument": "DummyCam", "visit": 42} 

824 # Create and register a DatasetType 

825 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

826 with self.assertRaises(TransactionTestError): 

827 with butler.transaction(): 

828 # Add needed Dimensions 

829 for args in dimensionEntries: 

830 butler.registry.insertDimensionData(*args) 

831 # Store a dataset 

832 ref = butler.put(metric, datasetTypeName, dataId) 

833 self.assertIsInstance(ref, DatasetRef) 

834 # Test getDirect 

835 metricOut = butler.getDirect(ref) 

836 self.assertEqual(metric, metricOut) 

837 # Test get 

838 metricOut = butler.get(datasetTypeName, dataId) 

839 self.assertEqual(metric, metricOut) 

840 # Check we can get components 

841 self.assertGetComponents(butler, ref, 

842 ("summary", "data", "output"), metric) 

843 raise TransactionTestError("This should roll back the entire transaction") 

844 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"): 

845 butler.registry.expandDataId(dataId) 

846 # Should raise LookupError for missing data ID value 

847 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

848 butler.get(datasetTypeName, dataId) 

849 # Also check explicitly if Dataset entry is missing 

850 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

851 # Direct retrieval should not find the file in the Datastore 

852 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

853 butler.getDirect(ref) 

854 

855 def testMakeRepo(self): 

856 """Test that we can write butler configuration to a new repository via 

857 the Butler.makeRepo interface and then instantiate a butler from the 

858 repo root. 

859 """ 

860 # Do not run the test if we know this datastore configuration does 

861 # not support a file system root 

862 if self.fullConfigKey is None: 

863 return 

864 

865 # create two separate directories 

866 root1 = tempfile.mkdtemp(dir=self.root) 

867 root2 = tempfile.mkdtemp(dir=self.root) 

868 

869 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

870 limited = Config(self.configFile) 

871 butler1 = Butler(butlerConfig) 

872 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

873 full = Config(self.tmpConfigFile) 

874 butler2 = Butler(butlerConfig) 

875 # Butlers should have the same configuration regardless of whether 

876 # defaults were expanded. 

877 self.assertEqual(butler1._config, butler2._config) 

878 # Config files loaded directly should not be the same. 

879 self.assertNotEqual(limited, full) 

880 # Make sure "limited" doesn't have a few keys we know it should be 

881 # inheriting from defaults. 

882 self.assertIn(self.fullConfigKey, full) 

883 self.assertNotIn(self.fullConfigKey, limited) 

884 

885 # Collections don't appear until something is put in them 

886 collections1 = set(butler1.registry.queryCollections()) 

887 self.assertEqual(collections1, set()) 

888 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

889 

890 # Check that a config with no associated file name will not 

891 # work properly with relocatable Butler repo 

892 butlerConfig.configFile = None 

893 with self.assertRaises(ValueError): 

894 Butler(butlerConfig) 

895 

896 with self.assertRaises(FileExistsError): 

897 Butler.makeRepo(self.root, standalone=True, 

898 config=Config(self.configFile), overwrite=False) 

899 

900 def testStringification(self): 

901 butler = Butler(self.tmpConfigFile, run="ingest") 

902 butlerStr = str(butler) 

903 

904 if self.datastoreStr is not None: 

905 for testStr in self.datastoreStr: 

906 self.assertIn(testStr, butlerStr) 

907 if self.registryStr is not None: 

908 self.assertIn(self.registryStr, butlerStr) 

909 

910 datastoreName = butler.datastore.name 

911 if self.datastoreName is not None: 

912 for testStr in self.datastoreName: 

913 self.assertIn(testStr, datastoreName) 

914 

915 def testButlerRewriteDataId(self): 

916 """Test that dataIds can be rewritten based on dimension records.""" 

917 

918 butler = Butler(self.tmpConfigFile, run="ingest") 

919 

920 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

921 datasetTypeName = "random_data" 

922 

923 # Create dimension records. 

924 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

925 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

926 "name": "d-r", 

927 "band": "R"}) 

928 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", 

929 "id": 1, "full_name": "det1"}) 

930 

931 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

932 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

933 butler.registry.registerDatasetType(datasetType) 

934 

935 n_exposures = 5 

936 dayobs = 20210530 

937 

938 for i in range(n_exposures): 

939 butler.registry.insertDimensionData("exposure", {"instrument": "DummyCamComp", 

940 "id": i, "obs_id": f"exp{i}", 

941 "seq_num": i, "day_obs": dayobs, 

942 "physical_filter": "d-r"}) 

943 

944 # Write some data. 

945 for i in range(n_exposures): 

946 metric = {"something": i, 

947 "other": "metric", 

948 "list": [2*x for x in range(i)]} 

949 

950 # Use the seq_num for the put to test rewriting. 

951 dataId = {"seq_num": i, "day_obs": dayobs, "detector": 1, "instrument": "DummyCamComp", 

952 "physical_filter": "d-r"} 

953 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

954 

955 # Check that the exposure is correct in the dataId 

956 self.assertEqual(ref.dataId["exposure"], i) 

957 

958 # and check that we can get the dataset back with the same dataId 

959 new_metric = butler.get(datasetTypeName, dataId=dataId) 

960 self.assertEqual(new_metric, metric) 

961 

962 

963class FileDatastoreButlerTests(ButlerTests): 

964 """Common tests and specialization of ButlerTests for butlers backed 

965 by datastores that inherit from FileDatastore. 

966 """ 

967 

968 def checkFileExists(self, root, relpath): 

969 """Checks if file exists at a given path (relative to root). 

970 

971 Test testPutTemplates verifies actual physical existance of the files 

972 in the requested location. 

973 """ 

974 uri = ButlerURI(root, forceDirectory=True) 

975 return uri.join(relpath).exists() 

976 

977 def testPutTemplates(self): 

978 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

979 butler = Butler(self.tmpConfigFile, run="ingest") 

980 

981 # Add needed Dimensions 

982 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

983 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

984 "name": "d-r", 

985 "band": "R"}) 

986 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

987 "physical_filter": "d-r"}) 

988 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

989 "physical_filter": "d-r"}) 

990 

991 # Create and store a dataset 

992 metric = makeExampleMetrics() 

993 

994 # Create two almost-identical DatasetTypes (both will use default 

995 # template) 

996 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

997 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

998 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

999 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1000 

1001 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1002 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1003 

1004 # Put with exactly the data ID keys needed 

1005 ref = butler.put(metric, "metric1", dataId1) 

1006 uri = butler.getURI(ref) 

1007 self.assertTrue(self.checkFileExists(butler.datastore.root, 

1008 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"), 

1009 f"Checking existence of {uri}") 

1010 

1011 # Check the template based on dimensions 

1012 butler.datastore.templates.validateTemplates([ref]) 

1013 

1014 # Put with extra data ID keys (physical_filter is an optional 

1015 # dependency); should not change template (at least the way we're 

1016 # defining them to behave now; the important thing is that they 

1017 # must be consistent). 

1018 ref = butler.put(metric, "metric2", dataId2) 

1019 uri = butler.getURI(ref) 

1020 self.assertTrue(self.checkFileExists(butler.datastore.root, 

1021 "ingest/metric2/d-r/DummyCamComp_v423.pickle"), 

1022 f"Checking existence of {uri}") 

1023 

1024 # Check the template based on dimensions 

1025 butler.datastore.templates.validateTemplates([ref]) 

1026 

1027 # Now use a file template that will not result in unique filenames 

1028 with self.assertRaises(FileTemplateValidationError): 

1029 butler.put(metric, "metric3", dataId1) 

1030 

1031 def testImportExport(self): 

1032 # Run put/get tests just to create and populate a repo. 

1033 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1034 self.runImportExportTest(storageClass) 

1035 

1036 @unittest.expectedFailure 

1037 def testImportExportVirtualComposite(self): 

1038 # Run put/get tests just to create and populate a repo. 

1039 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1040 self.runImportExportTest(storageClass) 

1041 

1042 def runImportExportTest(self, storageClass): 

1043 """This test does an export to a temp directory and an import back 

1044 into a new temp directory repo. It does not assume a posix datastore""" 

1045 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1046 print("Root:", exportButler.datastore.root) 

1047 # Test that the repo actually has at least one dataset. 

1048 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1049 self.assertGreater(len(datasets), 0) 

1050 # Add a DimensionRecord that's unused by those datasets. 

1051 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1052 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1053 # Export and then import datasets. 

1054 with safeTestTempDir(TESTDIR) as exportDir: 

1055 exportFile = os.path.join(exportDir, "exports.yaml") 

1056 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1057 export.saveDatasets(datasets) 

1058 # Export the same datasets again. This should quietly do 

1059 # nothing because of internal deduplication, and it shouldn't 

1060 # complain about being asked to export the "htm7" elements even 

1061 # though there aren't any in these datasets or in the database. 

1062 export.saveDatasets(datasets, elements=["htm7"]) 

1063 # Save one of the data IDs again; this should be harmless 

1064 # because of internal deduplication. 

1065 export.saveDataIds([datasets[0].dataId]) 

1066 # Save some dimension records directly. 

1067 export.saveDimensionData("skymap", [skymapRecord]) 

1068 self.assertTrue(os.path.exists(exportFile)) 

1069 with safeTestTempDir(TESTDIR) as importDir: 

1070 # We always want this to be a local posix butler 

1071 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1072 # Calling script.butlerImport tests the implementation of the 

1073 # butler command line interface "import" subcommand. Functions 

1074 # in the script folder are generally considered protected and 

1075 # should not be used as public api. 

1076 with open(exportFile, "r") as f: 

1077 script.butlerImport(importDir, export_file=f, directory=exportDir, 

1078 transfer="auto", skip_dimensions=None, reuse_ids=False) 

1079 importButler = Butler(importDir, run="ingest") 

1080 for ref in datasets: 

1081 with self.subTest(ref=ref): 

1082 # Test for existence by passing in the DatasetType and 

1083 # data ID separately, to avoid lookup by dataset_id. 

1084 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1085 self.assertEqual(list(importButler.registry.queryDimensionRecords("skymap")), 

1086 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)]) 

1087 

1088 def testRemoveRuns(self): 

1089 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1090 butler = Butler(self.tmpConfigFile, writeable=True) 

1091 # Load registry data with dimensions to hang datasets off of. 

1092 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1093 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1094 # Add some RUN-type collection. 

1095 run1 = "run1" 

1096 butler.registry.registerRun(run1) 

1097 run2 = "run2" 

1098 butler.registry.registerRun(run2) 

1099 # put a dataset in each 

1100 metric = makeExampleMetrics() 

1101 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1102 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

1103 butler.registry) 

1104 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1105 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1106 uri1 = butler.getURI(ref1, collections=[run1]) 

1107 uri2 = butler.getURI(ref2, collections=[run2]) 

1108 # Remove from both runs with different values for unstore. 

1109 butler.removeRuns([run1], unstore=True) 

1110 butler.removeRuns([run2], unstore=False) 

1111 # Should be nothing in registry for either one, and datastore should 

1112 # not think either exists. 

1113 with self.assertRaises(MissingCollectionError): 

1114 butler.registry.getCollectionType(run1) 

1115 with self.assertRaises(MissingCollectionError): 

1116 butler.registry.getCollectionType(run2) 

1117 self.assertFalse(butler.datastore.exists(ref1)) 

1118 self.assertFalse(butler.datastore.exists(ref2)) 

1119 # The ref we unstored should be gone according to the URI, but the 

1120 # one we forgot should still be around. 

1121 self.assertFalse(uri1.exists()) 

1122 self.assertTrue(uri2.exists()) 

1123 

1124 

1125class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1126 """PosixDatastore specialization of a butler""" 

1127 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1128 fullConfigKey = ".datastore.formatters" 

1129 validationCanFail = True 

1130 datastoreStr = ["/tmp"] 

1131 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1132 registryStr = "/gen3.sqlite3" 

1133 

1134 def testExportTransferCopy(self): 

1135 """Test local export using all transfer modes""" 

1136 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1137 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1138 # Test that the repo actually has at least one dataset. 

1139 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1140 self.assertGreater(len(datasets), 0) 

1141 uris = [exportButler.getURI(d) for d in datasets] 

1142 datastoreRoot = exportButler.datastore.root 

1143 

1144 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1145 

1146 for path in pathsInStore: 

1147 # Assume local file system 

1148 self.assertTrue(self.checkFileExists(datastoreRoot, path), 

1149 f"Checking path {path}") 

1150 

1151 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1152 with safeTestTempDir(TESTDIR) as exportDir: 

1153 with exportButler.export(directory=exportDir, format="yaml", 

1154 transfer=transfer) as export: 

1155 export.saveDatasets(datasets) 

1156 for path in pathsInStore: 

1157 self.assertTrue(self.checkFileExists(exportDir, path), 

1158 f"Check that mode {transfer} exported files") 

1159 

1160 

1161class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1162 """InMemoryDatastore specialization of a butler""" 

1163 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1164 fullConfigKey = None 

1165 useTempRoot = False 

1166 validationCanFail = False 

1167 datastoreStr = ["datastore='InMemory"] 

1168 datastoreName = ["InMemoryDatastore@"] 

1169 registryStr = "/gen3.sqlite3" 

1170 

1171 def testIngest(self): 

1172 pass 

1173 

1174 

1175class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1176 """PosixDatastore specialization""" 

1177 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1178 fullConfigKey = ".datastore.datastores.1.formatters" 

1179 validationCanFail = True 

1180 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1181 datastoreName = ["InMemoryDatastore@", f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1182 "SecondDatastore"] 

1183 registryStr = "/gen3.sqlite3" 

1184 

1185 

1186class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1187 """Test that a yaml file in one location can refer to a root in another.""" 

1188 

1189 datastoreStr = ["dir1"] 

1190 # Disable the makeRepo test since we are deliberately not using 

1191 # butler.yaml as the config name. 

1192 fullConfigKey = None 

1193 

1194 def setUp(self): 

1195 self.root = makeTestTempDir(TESTDIR) 

1196 

1197 # Make a new repository in one place 

1198 self.dir1 = os.path.join(self.root, "dir1") 

1199 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1200 

1201 # Move the yaml file to a different place and add a "root" 

1202 self.dir2 = os.path.join(self.root, "dir2") 

1203 safeMakeDir(self.dir2) 

1204 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1205 config = Config(configFile1) 

1206 config["root"] = self.dir1 

1207 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1208 config.dumpToUri(configFile2) 

1209 os.remove(configFile1) 

1210 self.tmpConfigFile = configFile2 

1211 

1212 def testFileLocations(self): 

1213 self.assertNotEqual(self.dir1, self.dir2) 

1214 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1215 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1216 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1217 

1218 

1219class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1220 """Test that a config file created by makeRepo outside of repo works.""" 

1221 

1222 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1223 

1224 def setUp(self): 

1225 self.root = makeTestTempDir(TESTDIR) 

1226 self.root2 = makeTestTempDir(TESTDIR) 

1227 

1228 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1229 Butler.makeRepo(self.root, config=Config(self.configFile), 

1230 outfile=self.tmpConfigFile) 

1231 

1232 def tearDown(self): 

1233 if os.path.exists(self.root2): 

1234 shutil.rmtree(self.root2, ignore_errors=True) 

1235 super().tearDown() 

1236 

1237 def testConfigExistence(self): 

1238 c = Config(self.tmpConfigFile) 

1239 uri_config = ButlerURI(c["root"]) 

1240 uri_expected = ButlerURI(self.root, forceDirectory=True) 

1241 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1242 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1243 

1244 def testPutGet(self): 

1245 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1246 self.runPutGetTest(storageClass, "test_metric") 

1247 

1248 

1249class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1250 """Test that a config file created by makeRepo outside of repo works.""" 

1251 

1252 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1253 

1254 def setUp(self): 

1255 self.root = makeTestTempDir(TESTDIR) 

1256 self.root2 = makeTestTempDir(TESTDIR) 

1257 

1258 self.tmpConfigFile = self.root2 

1259 Butler.makeRepo(self.root, config=Config(self.configFile), 

1260 outfile=self.tmpConfigFile) 

1261 

1262 def testConfigExistence(self): 

1263 # Append the yaml file else Config constructor does not know the file 

1264 # type. 

1265 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1266 super().testConfigExistence() 

1267 

1268 

1269class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1270 """Test that a config file created by makeRepo outside of repo works.""" 

1271 

1272 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1273 

1274 def setUp(self): 

1275 self.root = makeTestTempDir(TESTDIR) 

1276 self.root2 = makeTestTempDir(TESTDIR) 

1277 

1278 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

1279 Butler.makeRepo(self.root, config=Config(self.configFile), 

1280 outfile=self.tmpConfigFile) 

1281 

1282 

1283@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1284@mock_s3 

1285class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1286 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1287 a local in-memory SqlRegistry. 

1288 """ 

1289 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1290 fullConfigKey = None 

1291 validationCanFail = True 

1292 

1293 bucketName = "anybucketname" 

1294 """Name of the Bucket that will be used in the tests. The name is read from 

1295 the config file used with the tests during set-up. 

1296 """ 

1297 

1298 root = "butlerRoot/" 

1299 """Root repository directory expected to be used in case useTempRoot=False. 

1300 Otherwise the root is set to a 20 characters long randomly generated string 

1301 during set-up. 

1302 """ 

1303 

1304 datastoreStr = [f"datastore={root}"] 

1305 """Contains all expected root locations in a format expected to be 

1306 returned by Butler stringification. 

1307 """ 

1308 

1309 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1310 """The expected format of the S3 Datastore string.""" 

1311 

1312 registryStr = "/gen3.sqlite3" 

1313 """Expected format of the Registry string.""" 

1314 

1315 def genRoot(self): 

1316 """Returns a random string of len 20 to serve as a root 

1317 name for the temporary bucket repo. 

1318 

1319 This is equivalent to tempfile.mkdtemp as this is what self.root 

1320 becomes when useTempRoot is True. 

1321 """ 

1322 rndstr = "".join( 

1323 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1324 ) 

1325 return rndstr + "/" 

1326 

1327 def setUp(self): 

1328 config = Config(self.configFile) 

1329 uri = ButlerURI(config[".datastore.datastore.root"]) 

1330 self.bucketName = uri.netloc 

1331 

1332 # set up some fake credentials if they do not exist 

1333 self.usingDummyCredentials = setAwsEnvCredentials() 

1334 

1335 if self.useTempRoot: 

1336 self.root = self.genRoot() 

1337 rooturi = f"s3://{self.bucketName}/{self.root}" 

1338 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1339 

1340 # need local folder to store registry database 

1341 self.reg_dir = makeTestTempDir(TESTDIR) 

1342 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1343 

1344 # MOTO needs to know that we expect Bucket bucketname to exist 

1345 # (this used to be the class attribute bucketName) 

1346 s3 = boto3.resource("s3") 

1347 s3.create_bucket(Bucket=self.bucketName) 

1348 

1349 self.datastoreStr = f"datastore={self.root}" 

1350 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1351 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1352 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1353 

1354 def tearDown(self): 

1355 s3 = boto3.resource("s3") 

1356 bucket = s3.Bucket(self.bucketName) 

1357 try: 

1358 bucket.objects.all().delete() 

1359 except botocore.exceptions.ClientError as e: 

1360 if e.response["Error"]["Code"] == "404": 

1361 # the key was not reachable - pass 

1362 pass 

1363 else: 

1364 raise 

1365 

1366 bucket = s3.Bucket(self.bucketName) 

1367 bucket.delete() 

1368 

1369 # unset any potentially set dummy credentials 

1370 if self.usingDummyCredentials: 

1371 unsetAwsEnvCredentials() 

1372 

1373 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1374 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1375 

1376 if self.useTempRoot and os.path.exists(self.root): 

1377 shutil.rmtree(self.root, ignore_errors=True) 

1378 

1379 

1380@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1381# Mock required environment variables during tests 

1382@unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1383 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1384 TESTDIR, "config/testConfigs/webdav/token"), 

1385 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1386class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1387 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1388 a local in-memory SqlRegistry. 

1389 """ 

1390 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1391 fullConfigKey = None 

1392 validationCanFail = True 

1393 

1394 serverName = "localhost" 

1395 """Name of the server that will be used in the tests. 

1396 """ 

1397 

1398 portNumber = 8080 

1399 """Port on which the webdav server listens. Automatically chosen 

1400 at setUpClass via the _getfreeport() method 

1401 """ 

1402 

1403 root = "butlerRoot/" 

1404 """Root repository directory expected to be used in case useTempRoot=False. 

1405 Otherwise the root is set to a 20 characters long randomly generated string 

1406 during set-up. 

1407 """ 

1408 

1409 datastoreStr = [f"datastore={root}"] 

1410 """Contains all expected root locations in a format expected to be 

1411 returned by Butler stringification. 

1412 """ 

1413 

1414 datastoreName = ["FileDatastore@https://{serverName}/{root}"] 

1415 """The expected format of the WebdavDatastore string.""" 

1416 

1417 registryStr = "/gen3.sqlite3" 

1418 """Expected format of the Registry string.""" 

1419 

1420 serverThread = None 

1421 """Thread in which the local webdav server will run""" 

1422 

1423 stopWebdavServer = False 

1424 """This flag will cause the webdav server to 

1425 gracefully shut down when True 

1426 """ 

1427 

1428 def genRoot(self): 

1429 """Returns a random string of len 20 to serve as a root 

1430 name for the temporary bucket repo. 

1431 

1432 This is equivalent to tempfile.mkdtemp as this is what self.root 

1433 becomes when useTempRoot is True. 

1434 """ 

1435 rndstr = "".join( 

1436 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1437 ) 

1438 return rndstr + "/" 

1439 

1440 @classmethod 

1441 def setUpClass(cls): 

1442 # Do the same as inherited class 

1443 cls.storageClassFactory = StorageClassFactory() 

1444 cls.storageClassFactory.addFromConfig(cls.configFile) 

1445 

1446 cls.portNumber = cls._getfreeport() 

1447 # Run a local webdav server on which tests will be run 

1448 cls.serverThread = Thread(target=cls._serveWebdav, 

1449 args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), 

1450 daemon=True) 

1451 cls.serverThread.start() 

1452 # Wait for it to start 

1453 time.sleep(3) 

1454 

1455 @classmethod 

1456 def tearDownClass(cls): 

1457 # Ask for graceful shut down of the webdav server 

1458 cls.stopWebdavServer = True 

1459 # Wait for the thread to exit 

1460 cls.serverThread.join() 

1461 

1462 # Mock required environment variables during tests 

1463 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1464 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1465 TESTDIR, "config/testConfigs/webdav/token"), 

1466 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1467 def setUp(self): 

1468 config = Config(self.configFile) 

1469 

1470 if self.useTempRoot: 

1471 self.root = self.genRoot() 

1472 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1473 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1474 

1475 # need local folder to store registry database 

1476 self.reg_dir = makeTestTempDir(TESTDIR) 

1477 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1478 

1479 self.datastoreStr = f"datastore={self.root}" 

1480 self.datastoreName = [f"FileDatastore@{self.rooturi}"] 

1481 

1482 if not isWebdavEndpoint(self.rooturi): 

1483 raise OSError("Webdav server not running properly: cannot run tests.") 

1484 

1485 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1486 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1487 

1488 # Mock required environment variables during tests 

1489 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1490 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1491 TESTDIR, "config/testConfigs/webdav/token"), 

1492 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1493 def tearDown(self): 

1494 # Clear temporary directory 

1495 ButlerURI(self.rooturi).remove() 

1496 ButlerURI(self.rooturi).session.close() 

1497 

1498 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1499 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1500 

1501 if self.useTempRoot and os.path.exists(self.root): 

1502 shutil.rmtree(self.root, ignore_errors=True) 

1503 

1504 def _serveWebdav(self, port: int, stopWebdavServer): 

1505 """Starts a local webdav-compatible HTTP server, 

1506 Listening on http://localhost:port 

1507 This server only runs when this test class is instantiated, 

1508 and then shuts down. Must be started is a separate thread. 

1509 

1510 Parameters 

1511 ---------- 

1512 port : `int` 

1513 The port number on which the server should listen 

1514 """ 

1515 root_path = gettempdir() 

1516 

1517 config = { 

1518 "host": "0.0.0.0", 

1519 "port": port, 

1520 "provider_mapping": {"/": root_path}, 

1521 "http_authenticator": { 

1522 "domain_controller": None 

1523 }, 

1524 "simple_dc": {"user_mapping": {"*": True}}, 

1525 "verbose": 0, 

1526 } 

1527 app = WsgiDAVApp(config) 

1528 

1529 server_args = { 

1530 "bind_addr": (config["host"], config["port"]), 

1531 "wsgi_app": app, 

1532 } 

1533 server = wsgi.Server(**server_args) 

1534 server.prepare() 

1535 

1536 try: 

1537 # Start the actual server in a separate thread 

1538 t = Thread(target=server.serve, daemon=True) 

1539 t.start() 

1540 # watch stopWebdavServer, and gracefully 

1541 # shut down the server when True 

1542 while True: 

1543 if stopWebdavServer(): 

1544 break 

1545 time.sleep(1) 

1546 except KeyboardInterrupt: 

1547 print("Caught Ctrl-C, shutting down...") 

1548 finally: 

1549 server.stop() 

1550 t.join() 

1551 

1552 def _getfreeport(): 

1553 """ 

1554 Determines a free port using sockets. 

1555 """ 

1556 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

1557 free_socket.bind(('0.0.0.0', 0)) 

1558 free_socket.listen() 

1559 port = free_socket.getsockname()[1] 

1560 free_socket.close() 

1561 return port 

1562 

1563 

1564class PosixDatastoreTransfers(unittest.TestCase): 

1565 """Test data transfers between butlers. 

1566 

1567 Test for different managers. UUID to UUID and integer to integer are 

1568 tested. UUID to integer is not supported since we do not currently 

1569 want to allow that. Integer to UUID is supported with the caveat 

1570 that UUID4 will be generated and this will be incorrect for raw 

1571 dataset types. The test ignores that. 

1572 """ 

1573 

1574 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1575 

1576 @classmethod 

1577 def setUpClass(cls): 

1578 cls.storageClassFactory = StorageClassFactory() 

1579 cls.storageClassFactory.addFromConfig(cls.configFile) 

1580 

1581 def setUp(self): 

1582 self.root = makeTestTempDir(TESTDIR) 

1583 self.config = Config(self.configFile) 

1584 

1585 def tearDown(self): 

1586 removeTestTempDir(self.root) 

1587 

1588 def create_butler(self, manager, label): 

1589 config = Config(self.configFile) 

1590 config["registry", "managers", "datasets"] = manager 

1591 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), 

1592 writeable=True) 

1593 

1594 def create_butlers(self, manager1, manager2): 

1595 self.source_butler = self.create_butler(manager1, "1") 

1596 self.target_butler = self.create_butler(manager2, "2") 

1597 

1598 def testTransferUuidToUuid(self): 

1599 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions." 

1600 "ByDimensionsDatasetRecordStorageManagerUUID", 

1601 "lsst.daf.butler.registry.datasets.byDimensions." 

1602 "ByDimensionsDatasetRecordStorageManagerUUID", 

1603 ) 

1604 # Setting id_gen_map should have no effect here 

1605 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1606 

1607 def testTransferIntToInt(self): 

1608 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions." 

1609 "ByDimensionsDatasetRecordStorageManager", 

1610 "lsst.daf.butler.registry.datasets.byDimensions." 

1611 "ByDimensionsDatasetRecordStorageManager", 

1612 ) 

1613 # int dataset ID only allows UNIQUE 

1614 self.assertButlerTransfers() 

1615 

1616 def testTransferIntToUuid(self): 

1617 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions." 

1618 "ByDimensionsDatasetRecordStorageManager", 

1619 "lsst.daf.butler.registry.datasets.byDimensions." 

1620 "ByDimensionsDatasetRecordStorageManagerUUID", 

1621 ) 

1622 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1623 

1624 def assertButlerTransfers(self, id_gen_map=None): 

1625 """Test that a run can be transferred to another butler.""" 

1626 

1627 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1628 datasetTypeName = "random_data" 

1629 

1630 # Test will create 3 collections and we will want to transfer 

1631 # two of those three. 

1632 runs = ["run1", "run2", "other"] 

1633 

1634 # Also want to use two different dataset types to ensure that 

1635 # grouping works. 

1636 datasetTypeNames = ["random_data", "random_data_2"] 

1637 

1638 # Create the run collections in the source butler. 

1639 for run in runs: 

1640 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

1641 

1642 # Create dimensions in both butlers (transfer will not create them). 

1643 n_exposures = 30 

1644 for butler in (self.source_butler, self.target_butler): 

1645 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1646 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

1647 "name": "d-r", 

1648 "band": "R"}) 

1649 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", 

1650 "id": 1, "full_name": "det1"}) 

1651 

1652 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1653 for datasetTypeName in datasetTypeNames: 

1654 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1655 butler.registry.registerDatasetType(datasetType) 

1656 

1657 for i in range(n_exposures): 

1658 butler.registry.insertDimensionData("exposure", {"instrument": "DummyCamComp", 

1659 "id": i, "obs_id": f"exp{i}", 

1660 "physical_filter": "d-r"}) 

1661 

1662 # Write a dataset to an unrelated run -- this will ensure that 

1663 # we are rewriting integer dataset ids in the target if necessary. 

1664 # Will not be relevant for UUID. 

1665 run = "distraction" 

1666 butler = Butler(butler=self.source_butler, run=run) 

1667 butler.put({"unrelated": 5, "dataset": "test"}, datasetTypeName, 

1668 exposure=1, detector=1, instrument="DummyCamComp", physical_filter="d-r") 

1669 

1670 # Write some example metrics to the source 

1671 butler = Butler(butler=self.source_butler) 

1672 

1673 source_refs = [] 

1674 for i in range(n_exposures): 

1675 # Put a third of datasets into each collection, only retain 

1676 # two thirds. 

1677 index = i % 3 

1678 run = runs[index] 

1679 datasetTypeName = datasetTypeNames[i % 2] 

1680 

1681 metric = {"something": i, 

1682 "other": "metric", 

1683 "list": [2*x for x in range(i)]} 

1684 dataId = {"exposure": i, "detector": 1, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1685 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

1686 if index < 2: 

1687 source_refs.append(ref) 

1688 new_metric = butler.get(ref.unresolved(), collections=run) 

1689 self.assertEqual(new_metric, metric) 

1690 

1691 # Now transfer them to the second butler 

1692 transferred = self.target_butler.transfer_from(self.source_butler, source_refs, 

1693 id_gen_map=id_gen_map) 

1694 self.assertEqual(len(transferred), 20) 

1695 

1696 # Now try to get the same refs from the new butler. 

1697 for ref in source_refs: 

1698 unresolved_ref = ref.unresolved() 

1699 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run) 

1700 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run) 

1701 self.assertEqual(new_metric, old_metric) 

1702 

1703 

1704if __name__ == "__main__": 1704 ↛ 1705line 1704 didn't jump to line 1705, because the condition on line 1704 was never true

1705 unittest.main()