Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import os 

26import posixpath 

27import unittest 

28import tempfile 

29import shutil 

30import pickle 

31import string 

32import random 

33import time 

34import socket 

35 

36try: 

37 import boto3 

38 import botocore 

39 from moto import mock_s3 

40except ImportError: 

41 boto3 = None 

42 

43 def mock_s3(cls): 

44 """A no-op decorator in case moto mock_s3 can not be imported. 

45 """ 

46 return cls 

47 

48try: 

49 from cheroot import wsgi 

50 from wsgidav.wsgidav_app import WsgiDAVApp 

51except ImportError: 

52 WsgiDAVApp = None 

53 

54import astropy.time 

55from threading import Thread 

56from tempfile import gettempdir 

57from lsst.utils import doImport 

58from lsst.daf.butler.core.utils import safeMakeDir 

59from lsst.daf.butler import Butler, Config, ButlerConfig 

60from lsst.daf.butler import StorageClassFactory 

61from lsst.daf.butler import DatasetType, DatasetRef, DatasetIdGenEnum 

62from lsst.daf.butler import FileTemplateValidationError, ValidationError 

63from lsst.daf.butler import FileDataset 

64from lsst.daf.butler import CollectionSearch, CollectionType 

65from lsst.daf.butler import ButlerURI 

66from lsst.daf.butler import script 

67from lsst.daf.butler.registry import MissingCollectionError, ConflictingDefinitionError 

68from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

69from lsst.daf.butler.core._butlerUri.s3utils import (setAwsEnvCredentials, 

70 unsetAwsEnvCredentials) 

71from lsst.daf.butler.core._butlerUri.http import isWebdavEndpoint 

72 

73from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

74from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

75 

76TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

77 

78 

79def makeExampleMetrics(): 

80 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

81 {"a": [1, 2, 3], 

82 "b": {"blue": 5, "red": "green"}}, 

83 [563, 234, 456.7, 752, 8, 9, 27] 

84 ) 

85 

86 

87class TransactionTestError(Exception): 

88 """Specific error for testing transactions, to prevent misdiagnosing 

89 that might otherwise occur when a standard exception is used. 

90 """ 

91 pass 

92 

93 

94class ButlerConfigTests(unittest.TestCase): 

95 """Simple tests for ButlerConfig that are not tested in other test cases. 

96 """ 

97 

98 def testSearchPath(self): 

99 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

100 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

101 config1 = ButlerConfig(configFile) 

102 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

103 

104 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

105 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

106 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

107 self.assertIn("testConfigs", "\n".join(cm.output)) 

108 

109 key = ("datastore", "records", "table") 

110 self.assertNotEqual(config1[key], config2[key]) 

111 self.assertEqual(config2[key], "override_record") 

112 

113 

114class ButlerPutGetTests: 

115 """Helper method for running a suite of put/get tests from different 

116 butler configurations.""" 

117 

118 root = None 

119 

120 @staticmethod 

121 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

122 """Create a DatasetType and register it 

123 """ 

124 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

125 registry.registerDatasetType(datasetType) 

126 return datasetType 

127 

128 @classmethod 

129 def setUpClass(cls): 

130 cls.storageClassFactory = StorageClassFactory() 

131 cls.storageClassFactory.addFromConfig(cls.configFile) 

132 

133 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

134 datasetType = datasetRef.datasetType 

135 dataId = datasetRef.dataId 

136 deferred = butler.getDirectDeferred(datasetRef) 

137 

138 for component in components: 

139 compTypeName = datasetType.componentTypeName(component) 

140 result = butler.get(compTypeName, dataId, collections=collections) 

141 self.assertEqual(result, getattr(reference, component)) 

142 result_deferred = deferred.get(component=component) 

143 self.assertEqual(result_deferred, result) 

144 

145 def tearDown(self): 

146 removeTestTempDir(self.root) 

147 

148 def runPutGetTest(self, storageClass, datasetTypeName): 

149 # New datasets will be added to run and tag, but we will only look in 

150 # tag when looking up datasets. 

151 run = "ingest" 

152 butler = Butler(self.tmpConfigFile, run=run) 

153 

154 collections = set(butler.registry.queryCollections()) 

155 self.assertEqual(collections, set([run])) 

156 

157 # Create and register a DatasetType 

158 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

159 

160 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

161 

162 # Add needed Dimensions 

163 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

164 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

165 "name": "d-r", 

166 "band": "R"}) 

167 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp", 

168 "id": 1, 

169 "name": "default"}) 

170 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

171 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

172 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

173 "name": "fourtwentythree", "physical_filter": "d-r", 

174 "visit_system": 1, "datetime_begin": visit_start, 

175 "datetime_end": visit_end}) 

176 

177 # Add a second visit for some later tests 

178 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424, 

179 "name": "fourtwentyfour", "physical_filter": "d-r", 

180 "visit_system": 1}) 

181 

182 # Create and store a dataset 

183 metric = makeExampleMetrics() 

184 dataId = {"instrument": "DummyCamComp", "visit": 423} 

185 

186 # Create a DatasetRef for put 

187 refIn = DatasetRef(datasetType, dataId, id=None) 

188 

189 # Put with a preexisting id should fail 

190 with self.assertRaises(ValueError): 

191 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

192 

193 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

194 # and once with a DatasetType 

195 

196 # Keep track of any collections we add and do not clean up 

197 expected_collections = {run} 

198 

199 counter = 0 

200 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

201 # Since we are using subTest we can get cascading failures 

202 # here with the first attempt failing and the others failing 

203 # immediately because the dataset already exists. Work around 

204 # this by using a distinct run collection each time 

205 counter += 1 

206 this_run = f"put_run_{counter}" 

207 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

208 expected_collections.update({this_run}) 

209 

210 with self.subTest(args=args): 

211 ref = butler.put(metric, *args, run=this_run) 

212 self.assertIsInstance(ref, DatasetRef) 

213 

214 # Test getDirect 

215 metricOut = butler.getDirect(ref) 

216 self.assertEqual(metric, metricOut) 

217 # Test get 

218 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

219 self.assertEqual(metric, metricOut) 

220 # Test get with a datasetRef 

221 metricOut = butler.get(ref, collections=this_run) 

222 self.assertEqual(metric, metricOut) 

223 # Test getDeferred with dataId 

224 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

225 self.assertEqual(metric, metricOut) 

226 # Test getDeferred with a datasetRef 

227 metricOut = butler.getDeferred(ref, collections=this_run).get() 

228 self.assertEqual(metric, metricOut) 

229 # and deferred direct with ref 

230 metricOut = butler.getDirectDeferred(ref).get() 

231 self.assertEqual(metric, metricOut) 

232 

233 # Check we can get components 

234 if storageClass.isComposite(): 

235 self.assertGetComponents(butler, ref, 

236 ("summary", "data", "output"), metric, 

237 collections=this_run) 

238 

239 # Can the artifacts themselves be retrieved? 

240 if not butler.datastore.isEphemeral: 

241 root_uri = ButlerURI(self.root) 

242 

243 for preserve_path in (True, False): 

244 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

245 transferred = butler.retrieveArtifacts([ref], destination, 

246 preserve_path=preserve_path) 

247 self.assertGreater(len(transferred), 0) 

248 artifacts = list(ButlerURI.findFileResources([destination])) 

249 self.assertEqual(set(transferred), set(artifacts)) 

250 

251 for artifact in transferred: 

252 path_in_destination = artifact.relative_to(destination) 

253 self.assertIsNotNone(path_in_destination) 

254 

255 # when path is not preserved there should not be 

256 # any path separators. 

257 num_seps = path_in_destination.count("/") 

258 if preserve_path: 

259 self.assertGreater(num_seps, 0) 

260 else: 

261 self.assertEqual(num_seps, 0) 

262 

263 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

264 n_uris = len(secondary_uris) 

265 if primary_uri: 

266 n_uris += 1 

267 self.assertEqual(len(artifacts), n_uris, "Comparing expected artifacts vs actual:" 

268 f" {artifacts} vs {primary_uri} and {secondary_uris}") 

269 

270 if preserve_path: 

271 # No need to run these twice 

272 with self.assertRaises(ValueError): 

273 butler.retrieveArtifacts([ref], destination, transfer="move") 

274 

275 with self.assertRaises(FileExistsError): 

276 butler.retrieveArtifacts([ref], destination) 

277 

278 transferred_again = butler.retrieveArtifacts([ref], destination, 

279 preserve_path=preserve_path, 

280 overwrite=True) 

281 self.assertEqual(set(transferred_again), set(transferred)) 

282 

283 # Now remove the dataset completely. 

284 butler.pruneDatasets([ref], purge=True, unstore=True, run=this_run) 

285 # Lookup with original args should still fail. 

286 with self.assertRaises(LookupError): 

287 butler.datasetExists(*args, collections=this_run) 

288 # getDirect() should still fail. 

289 with self.assertRaises(FileNotFoundError): 

290 butler.getDirect(ref) 

291 # Registry shouldn't be able to find it by dataset_id anymore. 

292 self.assertIsNone(butler.registry.getDataset(ref.id)) 

293 

294 # Do explicit registry removal since we know they are 

295 # empty 

296 butler.registry.removeCollection(this_run) 

297 expected_collections.remove(this_run) 

298 

299 # Put the dataset again, since the last thing we did was remove it 

300 # and we want to use the default collection. 

301 ref = butler.put(metric, refIn) 

302 

303 # Get with parameters 

304 stop = 4 

305 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

306 self.assertNotEqual(metric, sliced) 

307 self.assertEqual(metric.summary, sliced.summary) 

308 self.assertEqual(metric.output, sliced.output) 

309 self.assertEqual(metric.data[:stop], sliced.data) 

310 # getDeferred with parameters 

311 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

312 self.assertNotEqual(metric, sliced) 

313 self.assertEqual(metric.summary, sliced.summary) 

314 self.assertEqual(metric.output, sliced.output) 

315 self.assertEqual(metric.data[:stop], sliced.data) 

316 # getDeferred with deferred parameters 

317 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

318 self.assertNotEqual(metric, sliced) 

319 self.assertEqual(metric.summary, sliced.summary) 

320 self.assertEqual(metric.output, sliced.output) 

321 self.assertEqual(metric.data[:stop], sliced.data) 

322 

323 if storageClass.isComposite(): 

324 # Check that components can be retrieved 

325 metricOut = butler.get(ref.datasetType.name, dataId) 

326 compNameS = ref.datasetType.componentTypeName("summary") 

327 compNameD = ref.datasetType.componentTypeName("data") 

328 summary = butler.get(compNameS, dataId) 

329 self.assertEqual(summary, metric.summary) 

330 data = butler.get(compNameD, dataId) 

331 self.assertEqual(data, metric.data) 

332 

333 if "counter" in storageClass.derivedComponents: 

334 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

335 self.assertEqual(count, len(data)) 

336 

337 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId, 

338 parameters={"slice": slice(stop)}) 

339 self.assertEqual(count, stop) 

340 

341 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

342 summary = butler.getDirect(compRef) 

343 self.assertEqual(summary, metric.summary) 

344 

345 # Create a Dataset type that has the same name but is inconsistent. 

346 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions, 

347 self.storageClassFactory.getStorageClass("Config")) 

348 

349 # Getting with a dataset type that does not match registry fails 

350 with self.assertRaises(ValueError): 

351 butler.get(inconsistentDatasetType, dataId) 

352 

353 # Combining a DatasetRef with a dataId should fail 

354 with self.assertRaises(ValueError): 

355 butler.get(ref, dataId) 

356 # Getting with an explicit ref should fail if the id doesn't match 

357 with self.assertRaises(ValueError): 

358 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

359 

360 # Getting a dataset with unknown parameters should fail 

361 with self.assertRaises(KeyError): 

362 butler.get(ref, parameters={"unsupported": True}) 

363 

364 # Check we have a collection 

365 collections = set(butler.registry.queryCollections()) 

366 self.assertEqual(collections, expected_collections) 

367 

368 # Clean up to check that we can remove something that may have 

369 # already had a component removed 

370 butler.pruneDatasets([ref], unstore=True, purge=True) 

371 

372 # Check that we can configure a butler to accept a put even 

373 # if it already has the dataset in registry. 

374 ref = butler.put(metric, refIn) 

375 

376 # Repeat put will fail. 

377 with self.assertRaises(ConflictingDefinitionError): 

378 butler.put(metric, refIn) 

379 

380 # Remove the datastore entry. 

381 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

382 

383 # Put will still fail 

384 with self.assertRaises(ConflictingDefinitionError): 

385 butler.put(metric, refIn) 

386 

387 # Allow the put to succeed 

388 butler._allow_put_of_predefined_dataset = True 

389 ref2 = butler.put(metric, refIn) 

390 self.assertEqual(ref2.id, ref.id) 

391 

392 # A second put will still fail but with a different exception 

393 # than before. 

394 with self.assertRaises(ConflictingDefinitionError): 

395 butler.put(metric, refIn) 

396 

397 # Reset the flag to avoid confusion 

398 butler._allow_put_of_predefined_dataset = False 

399 

400 # Leave the dataset in place since some downstream tests require 

401 # something to be present 

402 

403 return butler 

404 

405 def testDeferredCollectionPassing(self): 

406 # Construct a butler with no run or collection, but make it writeable. 

407 butler = Butler(self.tmpConfigFile, writeable=True) 

408 # Create and register a DatasetType 

409 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

410 datasetType = self.addDatasetType("example", dimensions, 

411 self.storageClassFactory.getStorageClass("StructuredData"), 

412 butler.registry) 

413 # Add needed Dimensions 

414 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

415 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

416 "name": "d-r", 

417 "band": "R"}) 

418 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

419 "name": "fourtwentythree", "physical_filter": "d-r"}) 

420 dataId = {"instrument": "DummyCamComp", "visit": 423} 

421 # Create dataset. 

422 metric = makeExampleMetrics() 

423 # Register a new run and put dataset. 

424 run = "deferred" 

425 butler.registry.registerRun(run) 

426 ref = butler.put(metric, datasetType, dataId, run=run) 

427 # Putting with no run should fail with TypeError. 

428 with self.assertRaises(TypeError): 

429 butler.put(metric, datasetType, dataId) 

430 # Dataset should exist. 

431 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

432 # We should be able to get the dataset back, but with and without 

433 # a deferred dataset handle. 

434 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

435 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

436 # Trying to find the dataset without any collection is a TypeError. 

437 with self.assertRaises(TypeError): 

438 butler.datasetExists(datasetType, dataId) 

439 with self.assertRaises(TypeError): 

440 butler.get(datasetType, dataId) 

441 # Associate the dataset with a different collection. 

442 butler.registry.registerCollection("tagged") 

443 butler.registry.associate("tagged", [ref]) 

444 # Deleting the dataset from the new collection should make it findable 

445 # in the original collection. 

446 butler.pruneDatasets([ref], tags=["tagged"]) 

447 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

448 

449 

450class ButlerTests(ButlerPutGetTests): 

451 """Tests for Butler. 

452 """ 

453 useTempRoot = True 

454 

455 def setUp(self): 

456 """Create a new butler root for each test.""" 

457 self.root = makeTestTempDir(TESTDIR) 

458 Butler.makeRepo(self.root, config=Config(self.configFile)) 

459 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

460 

461 def testConstructor(self): 

462 """Independent test of constructor. 

463 """ 

464 butler = Butler(self.tmpConfigFile, run="ingest") 

465 self.assertIsInstance(butler, Butler) 

466 

467 collections = set(butler.registry.queryCollections()) 

468 self.assertEqual(collections, {"ingest"}) 

469 

470 butler2 = Butler(butler=butler, collections=["other"]) 

471 self.assertEqual( 

472 butler2.collections, 

473 CollectionSearch.fromExpression(["other"]) 

474 ) 

475 self.assertIsNone(butler2.run) 

476 self.assertIs(butler.datastore, butler2.datastore) 

477 

478 def testBasicPutGet(self): 

479 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

480 self.runPutGetTest(storageClass, "test_metric") 

481 

482 def testCompositePutGetConcrete(self): 

483 

484 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

485 butler = self.runPutGetTest(storageClass, "test_metric") 

486 

487 # Should *not* be disassembled 

488 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

489 self.assertEqual(len(datasets), 1) 

490 uri, components = butler.getURIs(datasets[0]) 

491 self.assertIsInstance(uri, ButlerURI) 

492 self.assertFalse(components) 

493 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

494 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

495 

496 # Predicted dataset 

497 dataId = {"instrument": "DummyCamComp", "visit": 424} 

498 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

499 self.assertFalse(components) 

500 self.assertIsInstance(uri, ButlerURI) 

501 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

502 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

503 

504 def testCompositePutGetVirtual(self): 

505 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

506 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

507 

508 # Should be disassembled 

509 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

510 self.assertEqual(len(datasets), 1) 

511 uri, components = butler.getURIs(datasets[0]) 

512 

513 if butler.datastore.isEphemeral: 

514 # Never disassemble in-memory datastore 

515 self.assertIsInstance(uri, ButlerURI) 

516 self.assertFalse(components) 

517 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

518 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

519 else: 

520 self.assertIsNone(uri) 

521 self.assertEqual(set(components), set(storageClass.components)) 

522 for compuri in components.values(): 

523 self.assertIsInstance(compuri, ButlerURI) 

524 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

525 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

526 

527 # Predicted dataset 

528 dataId = {"instrument": "DummyCamComp", "visit": 424} 

529 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

530 

531 if butler.datastore.isEphemeral: 

532 # Never disassembled 

533 self.assertIsInstance(uri, ButlerURI) 

534 self.assertFalse(components) 

535 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

536 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

537 else: 

538 self.assertIsNone(uri) 

539 self.assertEqual(set(components), set(storageClass.components)) 

540 for compuri in components.values(): 

541 self.assertIsInstance(compuri, ButlerURI) 

542 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

543 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

544 

545 def testIngest(self): 

546 butler = Butler(self.tmpConfigFile, run="ingest") 

547 

548 # Create and register a DatasetType 

549 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

550 

551 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

552 datasetTypeName = "metric" 

553 

554 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

555 

556 # Add needed Dimensions 

557 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

558 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

559 "name": "d-r", 

560 "band": "R"}) 

561 for detector in (1, 2): 

562 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

563 "full_name": f"detector{detector}"}) 

564 

565 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

566 "name": "fourtwentythree", "physical_filter": "d-r"}, 

567 {"instrument": "DummyCamComp", "id": 424, 

568 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

569 

570 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

571 dataRoot = os.path.join(TESTDIR, "data", "basic") 

572 datasets = [] 

573 for detector in (1, 2): 

574 detector_name = f"detector_{detector}" 

575 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

576 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

577 # Create a DatasetRef for ingest 

578 refIn = DatasetRef(datasetType, dataId, id=None) 

579 

580 datasets.append(FileDataset(path=metricFile, 

581 refs=[refIn], 

582 formatter=formatter)) 

583 

584 butler.ingest(*datasets, transfer="copy") 

585 

586 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

587 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

588 

589 metrics1 = butler.get(datasetTypeName, dataId1) 

590 metrics2 = butler.get(datasetTypeName, dataId2) 

591 self.assertNotEqual(metrics1, metrics2) 

592 

593 # Compare URIs 

594 uri1 = butler.getURI(datasetTypeName, dataId1) 

595 uri2 = butler.getURI(datasetTypeName, dataId2) 

596 self.assertNotEqual(uri1, uri2) 

597 

598 # Now do a multi-dataset but single file ingest 

599 metricFile = os.path.join(dataRoot, "detectors.yaml") 

600 refs = [] 

601 for detector in (1, 2): 

602 detector_name = f"detector_{detector}" 

603 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

604 # Create a DatasetRef for ingest 

605 refs.append(DatasetRef(datasetType, dataId, id=None)) 

606 

607 datasets = [] 

608 datasets.append(FileDataset(path=metricFile, 

609 refs=refs, 

610 formatter=MultiDetectorFormatter)) 

611 

612 butler.ingest(*datasets, transfer="copy") 

613 

614 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

615 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

616 

617 multi1 = butler.get(datasetTypeName, dataId1) 

618 multi2 = butler.get(datasetTypeName, dataId2) 

619 

620 self.assertEqual(multi1, metrics1) 

621 self.assertEqual(multi2, metrics2) 

622 

623 # Compare URIs 

624 uri1 = butler.getURI(datasetTypeName, dataId1) 

625 uri2 = butler.getURI(datasetTypeName, dataId2) 

626 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

627 

628 # Test that removing one does not break the second 

629 # This line will issue a warning log message for a ChainedDatastore 

630 # that uses an InMemoryDatastore since in-memory can not ingest 

631 # files. 

632 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

633 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

634 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

635 multi2b = butler.get(datasetTypeName, dataId2) 

636 self.assertEqual(multi2, multi2b) 

637 

638 def testPruneCollections(self): 

639 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

640 butler = Butler(self.tmpConfigFile, writeable=True) 

641 # Load registry data with dimensions to hang datasets off of. 

642 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

643 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

644 # Add some RUN-type collections. 

645 run1 = "run1" 

646 butler.registry.registerRun(run1) 

647 run2 = "run2" 

648 butler.registry.registerRun(run2) 

649 # put some datasets. ref1 and ref2 have the same data ID, and are in 

650 # different runs. ref3 has a different data ID. 

651 metric = makeExampleMetrics() 

652 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

653 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

654 butler.registry) 

655 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

656 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

657 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

658 

659 # Try to delete a RUN collection without purge, or with purge and not 

660 # unstore. 

661 with self.assertRaises(TypeError): 

662 butler.pruneCollection(run1) 

663 with self.assertRaises(TypeError): 

664 butler.pruneCollection(run2, purge=True) 

665 # Add a TAGGED collection and associate ref3 only into it. 

666 tag1 = "tag1" 

667 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

668 butler.registry.associate(tag1, [ref3]) 

669 # Add a CHAINED collection that searches run1 and then run2. It 

670 # logically contains only ref1, because ref2 is shadowed due to them 

671 # having the same data ID and dataset type. 

672 chain1 = "chain1" 

673 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

674 butler.registry.setCollectionChain(chain1, [run1, run2]) 

675 # Try to delete RUN collections, which should fail with complete 

676 # rollback because they're still referenced by the CHAINED 

677 # collection. 

678 with self.assertRaises(Exception): 

679 butler.pruneCollection(run1, pruge=True, unstore=True) 

680 with self.assertRaises(Exception): 

681 butler.pruneCollection(run2, pruge=True, unstore=True) 

682 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

683 [ref1, ref2, ref3]) 

684 self.assertTrue(butler.datastore.exists(ref1)) 

685 self.assertTrue(butler.datastore.exists(ref2)) 

686 self.assertTrue(butler.datastore.exists(ref3)) 

687 # Try to delete CHAINED and TAGGED collections with purge; should not 

688 # work. 

689 with self.assertRaises(TypeError): 

690 butler.pruneCollection(tag1, purge=True, unstore=True) 

691 with self.assertRaises(TypeError): 

692 butler.pruneCollection(chain1, purge=True, unstore=True) 

693 # Remove the tagged collection with unstore=False. This should not 

694 # affect the datasets. 

695 butler.pruneCollection(tag1) 

696 with self.assertRaises(MissingCollectionError): 

697 butler.registry.getCollectionType(tag1) 

698 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

699 [ref1, ref2, ref3]) 

700 self.assertTrue(butler.datastore.exists(ref1)) 

701 self.assertTrue(butler.datastore.exists(ref2)) 

702 self.assertTrue(butler.datastore.exists(ref3)) 

703 # Add the tagged collection back in, and remove it with unstore=True. 

704 # This should remove ref3 only from the datastore. 

705 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

706 butler.registry.associate(tag1, [ref3]) 

707 butler.pruneCollection(tag1, unstore=True) 

708 with self.assertRaises(MissingCollectionError): 

709 butler.registry.getCollectionType(tag1) 

710 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

711 [ref1, ref2, ref3]) 

712 self.assertTrue(butler.datastore.exists(ref1)) 

713 self.assertTrue(butler.datastore.exists(ref2)) 

714 self.assertFalse(butler.datastore.exists(ref3)) 

715 # Delete the chain with unstore=False. The datasets should not be 

716 # affected at all. 

717 butler.pruneCollection(chain1) 

718 with self.assertRaises(MissingCollectionError): 

719 butler.registry.getCollectionType(chain1) 

720 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

721 [ref1, ref2, ref3]) 

722 self.assertTrue(butler.datastore.exists(ref1)) 

723 self.assertTrue(butler.datastore.exists(ref2)) 

724 self.assertFalse(butler.datastore.exists(ref3)) 

725 # Redefine and then delete the chain with unstore=True. Only ref1 

726 # should be unstored (ref3 has already been unstored, but otherwise 

727 # would be now). 

728 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

729 butler.registry.setCollectionChain(chain1, [run1, run2]) 

730 butler.pruneCollection(chain1, unstore=True) 

731 with self.assertRaises(MissingCollectionError): 

732 butler.registry.getCollectionType(chain1) 

733 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

734 [ref1, ref2, ref3]) 

735 self.assertFalse(butler.datastore.exists(ref1)) 

736 self.assertTrue(butler.datastore.exists(ref2)) 

737 self.assertFalse(butler.datastore.exists(ref3)) 

738 # Remove run1. This removes ref1 and ref3 from the registry (they're 

739 # already gone from the datastore, which is fine). 

740 butler.pruneCollection(run1, purge=True, unstore=True) 

741 with self.assertRaises(MissingCollectionError): 

742 butler.registry.getCollectionType(run1) 

743 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

744 [ref2]) 

745 self.assertTrue(butler.datastore.exists(ref2)) 

746 # Remove run2. This removes ref2 from the registry and the datastore. 

747 butler.pruneCollection(run2, purge=True, unstore=True) 

748 with self.assertRaises(MissingCollectionError): 

749 butler.registry.getCollectionType(run2) 

750 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

751 []) 

752 

753 # Now that the collections have been pruned we can remove the 

754 # dataset type 

755 butler.registry.removeDatasetType(datasetType.name) 

756 

757 def testPickle(self): 

758 """Test pickle support. 

759 """ 

760 butler = Butler(self.tmpConfigFile, run="ingest") 

761 butlerOut = pickle.loads(pickle.dumps(butler)) 

762 self.assertIsInstance(butlerOut, Butler) 

763 self.assertEqual(butlerOut._config, butler._config) 

764 self.assertEqual(butlerOut.collections, butler.collections) 

765 self.assertEqual(butlerOut.run, butler.run) 

766 

767 def testGetDatasetTypes(self): 

768 butler = Butler(self.tmpConfigFile, run="ingest") 

769 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

770 dimensionEntries = [ 

771 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

772 {"instrument": "DummyCamComp"}), 

773 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

774 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

775 ] 

776 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

777 # Add needed Dimensions 

778 for args in dimensionEntries: 

779 butler.registry.insertDimensionData(*args) 

780 

781 # When a DatasetType is added to the registry entries are not created 

782 # for components but querying them can return the components. 

783 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

784 components = set() 

785 for datasetTypeName in datasetTypeNames: 

786 # Create and register a DatasetType 

787 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

788 

789 for componentName in storageClass.components: 

790 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

791 

792 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

793 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

794 

795 # Now that we have some dataset types registered, validate them 

796 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

797 "datasetType.component", "random_data", "random_data_2"]) 

798 

799 # Add a new datasetType that will fail template validation 

800 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

801 if self.validationCanFail: 

802 with self.assertRaises(ValidationError): 

803 butler.validateConfiguration() 

804 

805 # Rerun validation but with a subset of dataset type names 

806 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

807 

808 # Rerun validation but ignore the bad datasetType 

809 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

810 "datasetType.component", "random_data", "random_data_2"]) 

811 

812 def testTransaction(self): 

813 butler = Butler(self.tmpConfigFile, run="ingest") 

814 datasetTypeName = "test_metric" 

815 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

816 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

817 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

818 "band": "R"}), 

819 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

820 "physical_filter": "d-r"})) 

821 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

822 metric = makeExampleMetrics() 

823 dataId = {"instrument": "DummyCam", "visit": 42} 

824 # Create and register a DatasetType 

825 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

826 with self.assertRaises(TransactionTestError): 

827 with butler.transaction(): 

828 # Add needed Dimensions 

829 for args in dimensionEntries: 

830 butler.registry.insertDimensionData(*args) 

831 # Store a dataset 

832 ref = butler.put(metric, datasetTypeName, dataId) 

833 self.assertIsInstance(ref, DatasetRef) 

834 # Test getDirect 

835 metricOut = butler.getDirect(ref) 

836 self.assertEqual(metric, metricOut) 

837 # Test get 

838 metricOut = butler.get(datasetTypeName, dataId) 

839 self.assertEqual(metric, metricOut) 

840 # Check we can get components 

841 self.assertGetComponents(butler, ref, 

842 ("summary", "data", "output"), metric) 

843 raise TransactionTestError("This should roll back the entire transaction") 

844 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"): 

845 butler.registry.expandDataId(dataId) 

846 # Should raise LookupError for missing data ID value 

847 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

848 butler.get(datasetTypeName, dataId) 

849 # Also check explicitly if Dataset entry is missing 

850 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

851 # Direct retrieval should not find the file in the Datastore 

852 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

853 butler.getDirect(ref) 

854 

855 def testMakeRepo(self): 

856 """Test that we can write butler configuration to a new repository via 

857 the Butler.makeRepo interface and then instantiate a butler from the 

858 repo root. 

859 """ 

860 # Do not run the test if we know this datastore configuration does 

861 # not support a file system root 

862 if self.fullConfigKey is None: 

863 return 

864 

865 # create two separate directories 

866 root1 = tempfile.mkdtemp(dir=self.root) 

867 root2 = tempfile.mkdtemp(dir=self.root) 

868 

869 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

870 limited = Config(self.configFile) 

871 butler1 = Butler(butlerConfig) 

872 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

873 full = Config(self.tmpConfigFile) 

874 butler2 = Butler(butlerConfig) 

875 # Butlers should have the same configuration regardless of whether 

876 # defaults were expanded. 

877 self.assertEqual(butler1._config, butler2._config) 

878 # Config files loaded directly should not be the same. 

879 self.assertNotEqual(limited, full) 

880 # Make sure "limited" doesn't have a few keys we know it should be 

881 # inheriting from defaults. 

882 self.assertIn(self.fullConfigKey, full) 

883 self.assertNotIn(self.fullConfigKey, limited) 

884 

885 # Collections don't appear until something is put in them 

886 collections1 = set(butler1.registry.queryCollections()) 

887 self.assertEqual(collections1, set()) 

888 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

889 

890 # Check that a config with no associated file name will not 

891 # work properly with relocatable Butler repo 

892 butlerConfig.configFile = None 

893 with self.assertRaises(ValueError): 

894 Butler(butlerConfig) 

895 

896 with self.assertRaises(FileExistsError): 

897 Butler.makeRepo(self.root, standalone=True, 

898 config=Config(self.configFile), overwrite=False) 

899 

900 def testStringification(self): 

901 butler = Butler(self.tmpConfigFile, run="ingest") 

902 butlerStr = str(butler) 

903 

904 if self.datastoreStr is not None: 

905 for testStr in self.datastoreStr: 

906 self.assertIn(testStr, butlerStr) 

907 if self.registryStr is not None: 

908 self.assertIn(self.registryStr, butlerStr) 

909 

910 datastoreName = butler.datastore.name 

911 if self.datastoreName is not None: 

912 for testStr in self.datastoreName: 

913 self.assertIn(testStr, datastoreName) 

914 

915 

916class FileDatastoreButlerTests(ButlerTests): 

917 """Common tests and specialization of ButlerTests for butlers backed 

918 by datastores that inherit from FileDatastore. 

919 """ 

920 

921 def checkFileExists(self, root, relpath): 

922 """Checks if file exists at a given path (relative to root). 

923 

924 Test testPutTemplates verifies actual physical existance of the files 

925 in the requested location. 

926 """ 

927 uri = ButlerURI(root, forceDirectory=True) 

928 return uri.join(relpath).exists() 

929 

930 def testPutTemplates(self): 

931 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

932 butler = Butler(self.tmpConfigFile, run="ingest") 

933 

934 # Add needed Dimensions 

935 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

936 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

937 "name": "d-r", 

938 "band": "R"}) 

939 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

940 "physical_filter": "d-r"}) 

941 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

942 "physical_filter": "d-r"}) 

943 

944 # Create and store a dataset 

945 metric = makeExampleMetrics() 

946 

947 # Create two almost-identical DatasetTypes (both will use default 

948 # template) 

949 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

950 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

951 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

952 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

953 

954 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

955 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

956 

957 # Put with exactly the data ID keys needed 

958 ref = butler.put(metric, "metric1", dataId1) 

959 uri = butler.getURI(ref) 

960 self.assertTrue(self.checkFileExists(butler.datastore.root, 

961 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"), 

962 f"Checking existence of {uri}") 

963 

964 # Check the template based on dimensions 

965 butler.datastore.templates.validateTemplates([ref]) 

966 

967 # Put with extra data ID keys (physical_filter is an optional 

968 # dependency); should not change template (at least the way we're 

969 # defining them to behave now; the important thing is that they 

970 # must be consistent). 

971 ref = butler.put(metric, "metric2", dataId2) 

972 uri = butler.getURI(ref) 

973 self.assertTrue(self.checkFileExists(butler.datastore.root, 

974 "ingest/metric2/d-r/DummyCamComp_v423.pickle"), 

975 f"Checking existence of {uri}") 

976 

977 # Check the template based on dimensions 

978 butler.datastore.templates.validateTemplates([ref]) 

979 

980 # Now use a file template that will not result in unique filenames 

981 with self.assertRaises(FileTemplateValidationError): 

982 butler.put(metric, "metric3", dataId1) 

983 

984 def testImportExport(self): 

985 # Run put/get tests just to create and populate a repo. 

986 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

987 self.runImportExportTest(storageClass) 

988 

989 @unittest.expectedFailure 

990 def testImportExportVirtualComposite(self): 

991 # Run put/get tests just to create and populate a repo. 

992 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

993 self.runImportExportTest(storageClass) 

994 

995 def runImportExportTest(self, storageClass): 

996 """This test does an export to a temp directory and an import back 

997 into a new temp directory repo. It does not assume a posix datastore""" 

998 exportButler = self.runPutGetTest(storageClass, "test_metric") 

999 print("Root:", exportButler.datastore.root) 

1000 # Test that the repo actually has at least one dataset. 

1001 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1002 self.assertGreater(len(datasets), 0) 

1003 # Add a DimensionRecord that's unused by those datasets. 

1004 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1005 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1006 # Export and then import datasets. 

1007 with safeTestTempDir(TESTDIR) as exportDir: 

1008 exportFile = os.path.join(exportDir, "exports.yaml") 

1009 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1010 export.saveDatasets(datasets) 

1011 # Export the same datasets again. This should quietly do 

1012 # nothing because of internal deduplication, and it shouldn't 

1013 # complain about being asked to export the "htm7" elements even 

1014 # though there aren't any in these datasets or in the database. 

1015 export.saveDatasets(datasets, elements=["htm7"]) 

1016 # Save one of the data IDs again; this should be harmless 

1017 # because of internal deduplication. 

1018 export.saveDataIds([datasets[0].dataId]) 

1019 # Save some dimension records directly. 

1020 export.saveDimensionData("skymap", [skymapRecord]) 

1021 self.assertTrue(os.path.exists(exportFile)) 

1022 with safeTestTempDir(TESTDIR) as importDir: 

1023 # We always want this to be a local posix butler 

1024 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1025 # Calling script.butlerImport tests the implementation of the 

1026 # butler command line interface "import" subcommand. Functions 

1027 # in the script folder are generally considered protected and 

1028 # should not be used as public api. 

1029 with open(exportFile, "r") as f: 

1030 script.butlerImport(importDir, export_file=f, directory=exportDir, 

1031 transfer="auto", skip_dimensions=None, reuse_ids=False) 

1032 importButler = Butler(importDir, run="ingest") 

1033 for ref in datasets: 

1034 with self.subTest(ref=ref): 

1035 # Test for existence by passing in the DatasetType and 

1036 # data ID separately, to avoid lookup by dataset_id. 

1037 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1038 self.assertEqual(list(importButler.registry.queryDimensionRecords("skymap")), 

1039 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)]) 

1040 

1041 def testRemoveRuns(self): 

1042 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1043 butler = Butler(self.tmpConfigFile, writeable=True) 

1044 # Load registry data with dimensions to hang datasets off of. 

1045 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1046 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1047 # Add some RUN-type collection. 

1048 run1 = "run1" 

1049 butler.registry.registerRun(run1) 

1050 run2 = "run2" 

1051 butler.registry.registerRun(run2) 

1052 # put a dataset in each 

1053 metric = makeExampleMetrics() 

1054 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1055 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

1056 butler.registry) 

1057 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1058 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1059 uri1 = butler.getURI(ref1, collections=[run1]) 

1060 uri2 = butler.getURI(ref2, collections=[run2]) 

1061 # Remove from both runs with different values for unstore. 

1062 butler.removeRuns([run1], unstore=True) 

1063 butler.removeRuns([run2], unstore=False) 

1064 # Should be nothing in registry for either one, and datastore should 

1065 # not think either exists. 

1066 with self.assertRaises(MissingCollectionError): 

1067 butler.registry.getCollectionType(run1) 

1068 with self.assertRaises(MissingCollectionError): 

1069 butler.registry.getCollectionType(run2) 

1070 self.assertFalse(butler.datastore.exists(ref1)) 

1071 self.assertFalse(butler.datastore.exists(ref2)) 

1072 # The ref we unstored should be gone according to the URI, but the 

1073 # one we forgot should still be around. 

1074 self.assertFalse(uri1.exists()) 

1075 self.assertTrue(uri2.exists()) 

1076 

1077 

1078class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1079 """PosixDatastore specialization of a butler""" 

1080 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1081 fullConfigKey = ".datastore.formatters" 

1082 validationCanFail = True 

1083 datastoreStr = ["/tmp"] 

1084 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1085 registryStr = "/gen3.sqlite3" 

1086 

1087 def testExportTransferCopy(self): 

1088 """Test local export using all transfer modes""" 

1089 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1090 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1091 # Test that the repo actually has at least one dataset. 

1092 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1093 self.assertGreater(len(datasets), 0) 

1094 uris = [exportButler.getURI(d) for d in datasets] 

1095 datastoreRoot = exportButler.datastore.root 

1096 

1097 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1098 

1099 for path in pathsInStore: 

1100 # Assume local file system 

1101 self.assertTrue(self.checkFileExists(datastoreRoot, path), 

1102 f"Checking path {path}") 

1103 

1104 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1105 with safeTestTempDir(TESTDIR) as exportDir: 

1106 with exportButler.export(directory=exportDir, format="yaml", 

1107 transfer=transfer) as export: 

1108 export.saveDatasets(datasets) 

1109 for path in pathsInStore: 

1110 self.assertTrue(self.checkFileExists(exportDir, path), 

1111 f"Check that mode {transfer} exported files") 

1112 

1113 

1114class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1115 """InMemoryDatastore specialization of a butler""" 

1116 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1117 fullConfigKey = None 

1118 useTempRoot = False 

1119 validationCanFail = False 

1120 datastoreStr = ["datastore='InMemory"] 

1121 datastoreName = ["InMemoryDatastore@"] 

1122 registryStr = "/gen3.sqlite3" 

1123 

1124 def testIngest(self): 

1125 pass 

1126 

1127 

1128class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1129 """PosixDatastore specialization""" 

1130 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1131 fullConfigKey = ".datastore.datastores.1.formatters" 

1132 validationCanFail = True 

1133 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1134 datastoreName = ["InMemoryDatastore@", f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1135 "SecondDatastore"] 

1136 registryStr = "/gen3.sqlite3" 

1137 

1138 

1139class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1140 """Test that a yaml file in one location can refer to a root in another.""" 

1141 

1142 datastoreStr = ["dir1"] 

1143 # Disable the makeRepo test since we are deliberately not using 

1144 # butler.yaml as the config name. 

1145 fullConfigKey = None 

1146 

1147 def setUp(self): 

1148 self.root = makeTestTempDir(TESTDIR) 

1149 

1150 # Make a new repository in one place 

1151 self.dir1 = os.path.join(self.root, "dir1") 

1152 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1153 

1154 # Move the yaml file to a different place and add a "root" 

1155 self.dir2 = os.path.join(self.root, "dir2") 

1156 safeMakeDir(self.dir2) 

1157 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1158 config = Config(configFile1) 

1159 config["root"] = self.dir1 

1160 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1161 config.dumpToUri(configFile2) 

1162 os.remove(configFile1) 

1163 self.tmpConfigFile = configFile2 

1164 

1165 def testFileLocations(self): 

1166 self.assertNotEqual(self.dir1, self.dir2) 

1167 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1168 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1169 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1170 

1171 

1172class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1173 """Test that a config file created by makeRepo outside of repo works.""" 

1174 

1175 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1176 

1177 def setUp(self): 

1178 self.root = makeTestTempDir(TESTDIR) 

1179 self.root2 = makeTestTempDir(TESTDIR) 

1180 

1181 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1182 Butler.makeRepo(self.root, config=Config(self.configFile), 

1183 outfile=self.tmpConfigFile) 

1184 

1185 def tearDown(self): 

1186 if os.path.exists(self.root2): 

1187 shutil.rmtree(self.root2, ignore_errors=True) 

1188 super().tearDown() 

1189 

1190 def testConfigExistence(self): 

1191 c = Config(self.tmpConfigFile) 

1192 uri_config = ButlerURI(c["root"]) 

1193 uri_expected = ButlerURI(self.root, forceDirectory=True) 

1194 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1195 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1196 

1197 def testPutGet(self): 

1198 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1199 self.runPutGetTest(storageClass, "test_metric") 

1200 

1201 

1202class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1203 """Test that a config file created by makeRepo outside of repo works.""" 

1204 

1205 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1206 

1207 def setUp(self): 

1208 self.root = makeTestTempDir(TESTDIR) 

1209 self.root2 = makeTestTempDir(TESTDIR) 

1210 

1211 self.tmpConfigFile = self.root2 

1212 Butler.makeRepo(self.root, config=Config(self.configFile), 

1213 outfile=self.tmpConfigFile) 

1214 

1215 def testConfigExistence(self): 

1216 # Append the yaml file else Config constructor does not know the file 

1217 # type. 

1218 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1219 super().testConfigExistence() 

1220 

1221 

1222class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1223 """Test that a config file created by makeRepo outside of repo works.""" 

1224 

1225 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1226 

1227 def setUp(self): 

1228 self.root = makeTestTempDir(TESTDIR) 

1229 self.root2 = makeTestTempDir(TESTDIR) 

1230 

1231 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

1232 Butler.makeRepo(self.root, config=Config(self.configFile), 

1233 outfile=self.tmpConfigFile) 

1234 

1235 

1236@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1237@mock_s3 

1238class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1239 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1240 a local in-memory SqlRegistry. 

1241 """ 

1242 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1243 fullConfigKey = None 

1244 validationCanFail = True 

1245 

1246 bucketName = "anybucketname" 

1247 """Name of the Bucket that will be used in the tests. The name is read from 

1248 the config file used with the tests during set-up. 

1249 """ 

1250 

1251 root = "butlerRoot/" 

1252 """Root repository directory expected to be used in case useTempRoot=False. 

1253 Otherwise the root is set to a 20 characters long randomly generated string 

1254 during set-up. 

1255 """ 

1256 

1257 datastoreStr = [f"datastore={root}"] 

1258 """Contains all expected root locations in a format expected to be 

1259 returned by Butler stringification. 

1260 """ 

1261 

1262 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1263 """The expected format of the S3 Datastore string.""" 

1264 

1265 registryStr = "/gen3.sqlite3" 

1266 """Expected format of the Registry string.""" 

1267 

1268 def genRoot(self): 

1269 """Returns a random string of len 20 to serve as a root 

1270 name for the temporary bucket repo. 

1271 

1272 This is equivalent to tempfile.mkdtemp as this is what self.root 

1273 becomes when useTempRoot is True. 

1274 """ 

1275 rndstr = "".join( 

1276 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1277 ) 

1278 return rndstr + "/" 

1279 

1280 def setUp(self): 

1281 config = Config(self.configFile) 

1282 uri = ButlerURI(config[".datastore.datastore.root"]) 

1283 self.bucketName = uri.netloc 

1284 

1285 # set up some fake credentials if they do not exist 

1286 self.usingDummyCredentials = setAwsEnvCredentials() 

1287 

1288 if self.useTempRoot: 

1289 self.root = self.genRoot() 

1290 rooturi = f"s3://{self.bucketName}/{self.root}" 

1291 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1292 

1293 # need local folder to store registry database 

1294 self.reg_dir = makeTestTempDir(TESTDIR) 

1295 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1296 

1297 # MOTO needs to know that we expect Bucket bucketname to exist 

1298 # (this used to be the class attribute bucketName) 

1299 s3 = boto3.resource("s3") 

1300 s3.create_bucket(Bucket=self.bucketName) 

1301 

1302 self.datastoreStr = f"datastore={self.root}" 

1303 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1304 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1305 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1306 

1307 def tearDown(self): 

1308 s3 = boto3.resource("s3") 

1309 bucket = s3.Bucket(self.bucketName) 

1310 try: 

1311 bucket.objects.all().delete() 

1312 except botocore.exceptions.ClientError as e: 

1313 if e.response["Error"]["Code"] == "404": 

1314 # the key was not reachable - pass 

1315 pass 

1316 else: 

1317 raise 

1318 

1319 bucket = s3.Bucket(self.bucketName) 

1320 bucket.delete() 

1321 

1322 # unset any potentially set dummy credentials 

1323 if self.usingDummyCredentials: 

1324 unsetAwsEnvCredentials() 

1325 

1326 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1327 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1328 

1329 if self.useTempRoot and os.path.exists(self.root): 

1330 shutil.rmtree(self.root, ignore_errors=True) 

1331 

1332 

1333@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1334# Mock required environment variables during tests 

1335@unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1336 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1337 TESTDIR, "config/testConfigs/webdav/token"), 

1338 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1339class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1340 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1341 a local in-memory SqlRegistry. 

1342 """ 

1343 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1344 fullConfigKey = None 

1345 validationCanFail = True 

1346 

1347 serverName = "localhost" 

1348 """Name of the server that will be used in the tests. 

1349 """ 

1350 

1351 portNumber = 8080 

1352 """Port on which the webdav server listens. Automatically chosen 

1353 at setUpClass via the _getfreeport() method 

1354 """ 

1355 

1356 root = "butlerRoot/" 

1357 """Root repository directory expected to be used in case useTempRoot=False. 

1358 Otherwise the root is set to a 20 characters long randomly generated string 

1359 during set-up. 

1360 """ 

1361 

1362 datastoreStr = [f"datastore={root}"] 

1363 """Contains all expected root locations in a format expected to be 

1364 returned by Butler stringification. 

1365 """ 

1366 

1367 datastoreName = ["FileDatastore@https://{serverName}/{root}"] 

1368 """The expected format of the WebdavDatastore string.""" 

1369 

1370 registryStr = "/gen3.sqlite3" 

1371 """Expected format of the Registry string.""" 

1372 

1373 serverThread = None 

1374 """Thread in which the local webdav server will run""" 

1375 

1376 stopWebdavServer = False 

1377 """This flag will cause the webdav server to 

1378 gracefully shut down when True 

1379 """ 

1380 

1381 def genRoot(self): 

1382 """Returns a random string of len 20 to serve as a root 

1383 name for the temporary bucket repo. 

1384 

1385 This is equivalent to tempfile.mkdtemp as this is what self.root 

1386 becomes when useTempRoot is True. 

1387 """ 

1388 rndstr = "".join( 

1389 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1390 ) 

1391 return rndstr + "/" 

1392 

1393 @classmethod 

1394 def setUpClass(cls): 

1395 # Do the same as inherited class 

1396 cls.storageClassFactory = StorageClassFactory() 

1397 cls.storageClassFactory.addFromConfig(cls.configFile) 

1398 

1399 cls.portNumber = cls._getfreeport() 

1400 # Run a local webdav server on which tests will be run 

1401 cls.serverThread = Thread(target=cls._serveWebdav, 

1402 args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), 

1403 daemon=True) 

1404 cls.serverThread.start() 

1405 # Wait for it to start 

1406 time.sleep(3) 

1407 

1408 @classmethod 

1409 def tearDownClass(cls): 

1410 # Ask for graceful shut down of the webdav server 

1411 cls.stopWebdavServer = True 

1412 # Wait for the thread to exit 

1413 cls.serverThread.join() 

1414 

1415 # Mock required environment variables during tests 

1416 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1417 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1418 TESTDIR, "config/testConfigs/webdav/token"), 

1419 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1420 def setUp(self): 

1421 config = Config(self.configFile) 

1422 

1423 if self.useTempRoot: 

1424 self.root = self.genRoot() 

1425 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1426 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1427 

1428 # need local folder to store registry database 

1429 self.reg_dir = makeTestTempDir(TESTDIR) 

1430 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1431 

1432 self.datastoreStr = f"datastore={self.root}" 

1433 self.datastoreName = [f"FileDatastore@{self.rooturi}"] 

1434 

1435 if not isWebdavEndpoint(self.rooturi): 

1436 raise OSError("Webdav server not running properly: cannot run tests.") 

1437 

1438 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1439 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1440 

1441 # Mock required environment variables during tests 

1442 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1443 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1444 TESTDIR, "config/testConfigs/webdav/token"), 

1445 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1446 def tearDown(self): 

1447 # Clear temporary directory 

1448 ButlerURI(self.rooturi).remove() 

1449 ButlerURI(self.rooturi).session.close() 

1450 

1451 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1452 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1453 

1454 if self.useTempRoot and os.path.exists(self.root): 

1455 shutil.rmtree(self.root, ignore_errors=True) 

1456 

1457 def _serveWebdav(self, port: int, stopWebdavServer): 

1458 """Starts a local webdav-compatible HTTP server, 

1459 Listening on http://localhost:8080 

1460 This server only runs when this test class is instantiated, 

1461 and then shuts down. Must be started is a separate thread. 

1462 

1463 Parameters 

1464 ---------- 

1465 port : `int` 

1466 The port number on which the server should listen 

1467 """ 

1468 root_path = gettempdir() 

1469 

1470 config = { 

1471 "host": "0.0.0.0", 

1472 "port": port, 

1473 "provider_mapping": {"/": root_path}, 

1474 "http_authenticator": { 

1475 "domain_controller": None 

1476 }, 

1477 "simple_dc": {"user_mapping": {"*": True}}, 

1478 "verbose": 0, 

1479 } 

1480 app = WsgiDAVApp(config) 

1481 

1482 server_args = { 

1483 "bind_addr": (config["host"], config["port"]), 

1484 "wsgi_app": app, 

1485 } 

1486 server = wsgi.Server(**server_args) 

1487 server.prepare() 

1488 

1489 try: 

1490 # Start the actual server in a separate thread 

1491 t = Thread(target=server.serve, daemon=True) 

1492 t.start() 

1493 # watch stopWebdavServer, and gracefully 

1494 # shut down the server when True 

1495 while True: 

1496 if stopWebdavServer(): 

1497 break 

1498 time.sleep(1) 

1499 except KeyboardInterrupt: 

1500 print("Caught Ctrl-C, shutting down...") 

1501 finally: 

1502 server.stop() 

1503 t.join() 

1504 

1505 def _getfreeport(): 

1506 """ 

1507 Determines a free port using sockets. 

1508 """ 

1509 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

1510 free_socket.bind(('0.0.0.0', 0)) 

1511 free_socket.listen() 

1512 port = free_socket.getsockname()[1] 

1513 free_socket.close() 

1514 return port 

1515 

1516 

1517class PosixDatastoreTransfers(unittest.TestCase): 

1518 """Test data transfers between butlers. 

1519 

1520 Test for different managers. UUID to UUID and integer to integer are 

1521 tested. UUID to integer is not supported since we do not currently 

1522 want to allow that. Integer to UUID is supported with the caveat 

1523 that UUID4 will be generated and this will be incorrect for raw 

1524 dataset types. The test ignores that. 

1525 """ 

1526 

1527 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1528 

1529 @classmethod 

1530 def setUpClass(cls): 

1531 cls.storageClassFactory = StorageClassFactory() 

1532 cls.storageClassFactory.addFromConfig(cls.configFile) 

1533 

1534 def setUp(self): 

1535 self.root = makeTestTempDir(TESTDIR) 

1536 self.config = Config(self.configFile) 

1537 

1538 def tearDown(self): 

1539 removeTestTempDir(self.root) 

1540 

1541 def create_butler(self, manager, label): 

1542 config = Config(self.configFile) 

1543 config["registry", "managers", "datasets"] = manager 

1544 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), 

1545 writeable=True) 

1546 

1547 def create_butlers(self, manager1, manager2): 

1548 self.source_butler = self.create_butler(manager1, "1") 

1549 self.target_butler = self.create_butler(manager2, "2") 

1550 

1551 def testTransferUuidToUuid(self): 

1552 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions." 

1553 "ByDimensionsDatasetRecordStorageManagerUUID", 

1554 "lsst.daf.butler.registry.datasets.byDimensions." 

1555 "ByDimensionsDatasetRecordStorageManagerUUID", 

1556 ) 

1557 # Setting id_gen_map should have no effect here 

1558 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1559 

1560 def testTransferIntToInt(self): 

1561 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions." 

1562 "ByDimensionsDatasetRecordStorageManager", 

1563 "lsst.daf.butler.registry.datasets.byDimensions." 

1564 "ByDimensionsDatasetRecordStorageManager", 

1565 ) 

1566 # int dataset ID only allows UNIQUE 

1567 self.assertButlerTransfers() 

1568 

1569 def testTransferIntToUuid(self): 

1570 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions." 

1571 "ByDimensionsDatasetRecordStorageManager", 

1572 "lsst.daf.butler.registry.datasets.byDimensions." 

1573 "ByDimensionsDatasetRecordStorageManagerUUID", 

1574 ) 

1575 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1576 

1577 def assertButlerTransfers(self, id_gen_map=None): 

1578 """Test that a run can be transferred to another butler.""" 

1579 

1580 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1581 datasetTypeName = "random_data" 

1582 

1583 # Test will create 3 collections and we will want to transfer 

1584 # two of those three. 

1585 runs = ["run1", "run2", "other"] 

1586 

1587 # Also want to use two different dataset types to ensure that 

1588 # grouping works. 

1589 datasetTypeNames = ["random_data", "random_data_2"] 

1590 

1591 # Create the run collections in the source butler. 

1592 for run in runs: 

1593 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

1594 

1595 # Create dimensions in both butlers (transfer will not create them). 

1596 n_exposures = 30 

1597 for butler in (self.source_butler, self.target_butler): 

1598 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1599 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

1600 "name": "d-r", 

1601 "band": "R"}) 

1602 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", 

1603 "id": 1, "full_name": "det1"}) 

1604 

1605 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1606 for datasetTypeName in datasetTypeNames: 

1607 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1608 butler.registry.registerDatasetType(datasetType) 

1609 

1610 for i in range(n_exposures): 

1611 butler.registry.insertDimensionData("exposure", {"instrument": "DummyCamComp", 

1612 "id": i, "obs_id": f"exp{i}", 

1613 "physical_filter": "d-r"}) 

1614 

1615 # Write a dataset to an unrelated run -- this will ensure that 

1616 # we are rewriting integer dataset ids in the target if necessary. 

1617 # Will not be relevant for UUID. 

1618 run = "distraction" 

1619 butler = Butler(butler=self.source_butler, run=run) 

1620 butler.put({"unrelated": 5, "dataset": "test"}, datasetTypeName, 

1621 exposure=1, detector=1, instrument="DummyCamComp", physical_filter="d-r") 

1622 

1623 # Write some example metrics to the source 

1624 butler = Butler(butler=self.source_butler) 

1625 

1626 source_refs = [] 

1627 for i in range(n_exposures): 

1628 # Put a third of datasets into each collection, only retain 

1629 # two thirds. 

1630 index = i % 3 

1631 run = runs[index] 

1632 datasetTypeName = datasetTypeNames[i % 2] 

1633 

1634 metric = {"something": i, 

1635 "other": "metric", 

1636 "list": [2*x for x in range(i)]} 

1637 dataId = {"exposure": i, "detector": 1, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1638 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

1639 if index < 2: 

1640 source_refs.append(ref) 

1641 new_metric = butler.get(ref.unresolved(), collections=run) 

1642 self.assertEqual(new_metric, metric) 

1643 

1644 # Now transfer them to the second butler 

1645 transferred = self.target_butler.transfer_from(self.source_butler, source_refs, 

1646 id_gen_map=id_gen_map) 

1647 self.assertEqual(len(transferred), 20) 

1648 

1649 # Now try to get the same refs from the new butler. 

1650 for ref in source_refs: 

1651 unresolved_ref = ref.unresolved() 

1652 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run) 

1653 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run) 

1654 self.assertEqual(new_metric, old_metric) 

1655 

1656 

1657if __name__ == "__main__": 1657 ↛ 1658line 1657 didn't jump to line 1658, because the condition on line 1657 was never true

1658 unittest.main()