Coverage for tests/test_butler.py: 16%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1139 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import logging 

26import os 

27import pathlib 

28import pickle 

29import posixpath 

30import random 

31import shutil 

32import socket 

33import string 

34import tempfile 

35import time 

36import unittest 

37 

38try: 

39 import boto3 

40 import botocore 

41 from moto import mock_s3 

42except ImportError: 

43 boto3 = None 

44 

45 def mock_s3(cls): 

46 """A no-op decorator in case moto mock_s3 can not be imported.""" 

47 return cls 

48 

49 

50try: 

51 from cheroot import wsgi 

52 from wsgidav.wsgidav_app import WsgiDAVApp 

53except ImportError: 

54 WsgiDAVApp = None 

55 

56from tempfile import gettempdir 

57from threading import Thread 

58 

59import astropy.time 

60from lsst.daf.butler import ( 

61 Butler, 

62 ButlerConfig, 

63 CollectionSearch, 

64 CollectionType, 

65 Config, 

66 DatasetIdGenEnum, 

67 DatasetRef, 

68 DatasetType, 

69 FileDataset, 

70 FileTemplateValidationError, 

71 StorageClassFactory, 

72 ValidationError, 

73 script, 

74) 

75from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

76from lsst.daf.butler.registry import ( 

77 CollectionError, 

78 CollectionTypeError, 

79 ConflictingDefinitionError, 

80 DataIdValueError, 

81 MissingCollectionError, 

82) 

83from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

84from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

85from lsst.resources import ResourcePath 

86from lsst.resources.http import isWebdavEndpoint 

87from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

88from lsst.utils import doImport 

89from lsst.utils.introspection import get_full_type_name 

90 

91TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

92 

93 

94def makeExampleMetrics(): 

95 return MetricsExample( 

96 {"AM1": 5.2, "AM2": 30.6}, 

97 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

98 [563, 234, 456.7, 752, 8, 9, 27], 

99 ) 

100 

101 

102class TransactionTestError(Exception): 

103 """Specific error for testing transactions, to prevent misdiagnosing 

104 that might otherwise occur when a standard exception is used. 

105 """ 

106 

107 pass 

108 

109 

110class ButlerConfigTests(unittest.TestCase): 

111 """Simple tests for ButlerConfig that are not tested in any other test 

112 cases.""" 

113 

114 def testSearchPath(self): 

115 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

116 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

117 config1 = ButlerConfig(configFile) 

118 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

119 

120 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

121 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

122 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

123 self.assertIn("testConfigs", "\n".join(cm.output)) 

124 

125 key = ("datastore", "records", "table") 

126 self.assertNotEqual(config1[key], config2[key]) 

127 self.assertEqual(config2[key], "override_record") 

128 

129 

130class ButlerPutGetTests: 

131 """Helper method for running a suite of put/get tests from different 

132 butler configurations.""" 

133 

134 root = None 

135 default_run = "ingésτ😺" 

136 

137 @staticmethod 

138 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

139 """Create a DatasetType and register it""" 

140 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

141 registry.registerDatasetType(datasetType) 

142 return datasetType 

143 

144 @classmethod 

145 def setUpClass(cls): 

146 cls.storageClassFactory = StorageClassFactory() 

147 cls.storageClassFactory.addFromConfig(cls.configFile) 

148 

149 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

150 datasetType = datasetRef.datasetType 

151 dataId = datasetRef.dataId 

152 deferred = butler.getDirectDeferred(datasetRef) 

153 

154 for component in components: 

155 compTypeName = datasetType.componentTypeName(component) 

156 result = butler.get(compTypeName, dataId, collections=collections) 

157 self.assertEqual(result, getattr(reference, component)) 

158 result_deferred = deferred.get(component=component) 

159 self.assertEqual(result_deferred, result) 

160 

161 def tearDown(self): 

162 removeTestTempDir(self.root) 

163 

164 def create_butler(self, run, storageClass, datasetTypeName): 

165 butler = Butler(self.tmpConfigFile, run=run) 

166 

167 collections = set(butler.registry.queryCollections()) 

168 self.assertEqual(collections, set([run])) 

169 

170 # Create and register a DatasetType 

171 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

172 

173 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

174 

175 # Add needed Dimensions 

176 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

177 butler.registry.insertDimensionData( 

178 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

179 ) 

180 butler.registry.insertDimensionData( 

181 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

182 ) 

183 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

184 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

185 butler.registry.insertDimensionData( 

186 "visit", 

187 { 

188 "instrument": "DummyCamComp", 

189 "id": 423, 

190 "name": "fourtwentythree", 

191 "physical_filter": "d-r", 

192 "visit_system": 1, 

193 "datetime_begin": visit_start, 

194 "datetime_end": visit_end, 

195 }, 

196 ) 

197 

198 # Add more visits for some later tests 

199 for visit_id in (424, 425): 

200 butler.registry.insertDimensionData( 

201 "visit", 

202 { 

203 "instrument": "DummyCamComp", 

204 "id": visit_id, 

205 "name": f"fourtwentyfour_{visit_id}", 

206 "physical_filter": "d-r", 

207 "visit_system": 1, 

208 }, 

209 ) 

210 return butler, datasetType 

211 

212 def runPutGetTest(self, storageClass, datasetTypeName): 

213 # New datasets will be added to run and tag, but we will only look in 

214 # tag when looking up datasets. 

215 run = self.default_run 

216 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

217 

218 # Create and store a dataset 

219 metric = makeExampleMetrics() 

220 dataId = {"instrument": "DummyCamComp", "visit": 423} 

221 

222 # Create a DatasetRef for put 

223 refIn = DatasetRef(datasetType, dataId, id=None) 

224 

225 # Put with a preexisting id should fail 

226 with self.assertRaises(ValueError): 

227 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

228 

229 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

230 # and once with a DatasetType 

231 

232 # Keep track of any collections we add and do not clean up 

233 expected_collections = {run} 

234 

235 counter = 0 

236 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

237 # Since we are using subTest we can get cascading failures 

238 # here with the first attempt failing and the others failing 

239 # immediately because the dataset already exists. Work around 

240 # this by using a distinct run collection each time 

241 counter += 1 

242 this_run = f"put_run_{counter}" 

243 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

244 expected_collections.update({this_run}) 

245 

246 with self.subTest(args=args): 

247 ref = butler.put(metric, *args, run=this_run) 

248 self.assertIsInstance(ref, DatasetRef) 

249 

250 # Test getDirect 

251 metricOut = butler.getDirect(ref) 

252 self.assertEqual(metric, metricOut) 

253 # Test get 

254 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

255 self.assertEqual(metric, metricOut) 

256 # Test get with a datasetRef 

257 metricOut = butler.get(ref, collections=this_run) 

258 self.assertEqual(metric, metricOut) 

259 # Test getDeferred with dataId 

260 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

261 self.assertEqual(metric, metricOut) 

262 # Test getDeferred with a datasetRef 

263 metricOut = butler.getDeferred(ref, collections=this_run).get() 

264 self.assertEqual(metric, metricOut) 

265 # and deferred direct with ref 

266 metricOut = butler.getDirectDeferred(ref).get() 

267 self.assertEqual(metric, metricOut) 

268 

269 # Check we can get components 

270 if storageClass.isComposite(): 

271 self.assertGetComponents( 

272 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

273 ) 

274 

275 # Can the artifacts themselves be retrieved? 

276 if not butler.datastore.isEphemeral: 

277 root_uri = ResourcePath(self.root) 

278 

279 for preserve_path in (True, False): 

280 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

281 # Use copy so that we can test that overwrite 

282 # protection works (using "auto" for File URIs would 

283 # use hard links and subsequent transfer would work 

284 # because it knows they are the same file). 

285 transferred = butler.retrieveArtifacts( 

286 [ref], destination, preserve_path=preserve_path, transfer="copy" 

287 ) 

288 self.assertGreater(len(transferred), 0) 

289 artifacts = list(ResourcePath.findFileResources([destination])) 

290 self.assertEqual(set(transferred), set(artifacts)) 

291 

292 for artifact in transferred: 

293 path_in_destination = artifact.relative_to(destination) 

294 self.assertIsNotNone(path_in_destination) 

295 

296 # when path is not preserved there should not be 

297 # any path separators. 

298 num_seps = path_in_destination.count("/") 

299 if preserve_path: 

300 self.assertGreater(num_seps, 0) 

301 else: 

302 self.assertEqual(num_seps, 0) 

303 

304 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

305 n_uris = len(secondary_uris) 

306 if primary_uri: 

307 n_uris += 1 

308 self.assertEqual( 

309 len(artifacts), 

310 n_uris, 

311 "Comparing expected artifacts vs actual:" 

312 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

313 ) 

314 

315 if preserve_path: 

316 # No need to run these twice 

317 with self.assertRaises(ValueError): 

318 butler.retrieveArtifacts([ref], destination, transfer="move") 

319 

320 with self.assertRaises(FileExistsError): 

321 butler.retrieveArtifacts([ref], destination) 

322 

323 transferred_again = butler.retrieveArtifacts( 

324 [ref], destination, preserve_path=preserve_path, overwrite=True 

325 ) 

326 self.assertEqual(set(transferred_again), set(transferred)) 

327 

328 # Now remove the dataset completely. 

329 butler.pruneDatasets([ref], purge=True, unstore=True, run=this_run) 

330 # Lookup with original args should still fail. 

331 with self.assertRaises(LookupError): 

332 butler.datasetExists(*args, collections=this_run) 

333 # getDirect() should still fail. 

334 with self.assertRaises(FileNotFoundError): 

335 butler.getDirect(ref) 

336 # Registry shouldn't be able to find it by dataset_id anymore. 

337 self.assertIsNone(butler.registry.getDataset(ref.id)) 

338 

339 # Do explicit registry removal since we know they are 

340 # empty 

341 butler.registry.removeCollection(this_run) 

342 expected_collections.remove(this_run) 

343 

344 # Put the dataset again, since the last thing we did was remove it 

345 # and we want to use the default collection. 

346 ref = butler.put(metric, refIn) 

347 

348 # Get with parameters 

349 stop = 4 

350 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

351 self.assertNotEqual(metric, sliced) 

352 self.assertEqual(metric.summary, sliced.summary) 

353 self.assertEqual(metric.output, sliced.output) 

354 self.assertEqual(metric.data[:stop], sliced.data) 

355 # getDeferred with parameters 

356 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

357 self.assertNotEqual(metric, sliced) 

358 self.assertEqual(metric.summary, sliced.summary) 

359 self.assertEqual(metric.output, sliced.output) 

360 self.assertEqual(metric.data[:stop], sliced.data) 

361 # getDeferred with deferred parameters 

362 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

363 self.assertNotEqual(metric, sliced) 

364 self.assertEqual(metric.summary, sliced.summary) 

365 self.assertEqual(metric.output, sliced.output) 

366 self.assertEqual(metric.data[:stop], sliced.data) 

367 

368 if storageClass.isComposite(): 

369 # Check that components can be retrieved 

370 metricOut = butler.get(ref.datasetType.name, dataId) 

371 compNameS = ref.datasetType.componentTypeName("summary") 

372 compNameD = ref.datasetType.componentTypeName("data") 

373 summary = butler.get(compNameS, dataId) 

374 self.assertEqual(summary, metric.summary) 

375 data = butler.get(compNameD, dataId) 

376 self.assertEqual(data, metric.data) 

377 

378 if "counter" in storageClass.derivedComponents: 

379 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

380 self.assertEqual(count, len(data)) 

381 

382 count = butler.get( 

383 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

384 ) 

385 self.assertEqual(count, stop) 

386 

387 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

388 summary = butler.getDirect(compRef) 

389 self.assertEqual(summary, metric.summary) 

390 

391 # Create a Dataset type that has the same name but is inconsistent. 

392 inconsistentDatasetType = DatasetType( 

393 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

394 ) 

395 

396 # Getting with a dataset type that does not match registry fails 

397 with self.assertRaises(ValueError): 

398 butler.get(inconsistentDatasetType, dataId) 

399 

400 # Combining a DatasetRef with a dataId should fail 

401 with self.assertRaises(ValueError): 

402 butler.get(ref, dataId) 

403 # Getting with an explicit ref should fail if the id doesn't match 

404 with self.assertRaises(ValueError): 

405 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

406 

407 # Getting a dataset with unknown parameters should fail 

408 with self.assertRaises(KeyError): 

409 butler.get(ref, parameters={"unsupported": True}) 

410 

411 # Check we have a collection 

412 collections = set(butler.registry.queryCollections()) 

413 self.assertEqual(collections, expected_collections) 

414 

415 # Clean up to check that we can remove something that may have 

416 # already had a component removed 

417 butler.pruneDatasets([ref], unstore=True, purge=True) 

418 

419 # Check that we can configure a butler to accept a put even 

420 # if it already has the dataset in registry. 

421 ref = butler.put(metric, refIn) 

422 

423 # Repeat put will fail. 

424 with self.assertRaises(ConflictingDefinitionError): 

425 butler.put(metric, refIn) 

426 

427 # Remove the datastore entry. 

428 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

429 

430 # Put will still fail 

431 with self.assertRaises(ConflictingDefinitionError): 

432 butler.put(metric, refIn) 

433 

434 # Allow the put to succeed 

435 butler._allow_put_of_predefined_dataset = True 

436 ref2 = butler.put(metric, refIn) 

437 self.assertEqual(ref2.id, ref.id) 

438 

439 # A second put will still fail but with a different exception 

440 # than before. 

441 with self.assertRaises(ConflictingDefinitionError): 

442 butler.put(metric, refIn) 

443 

444 # Reset the flag to avoid confusion 

445 butler._allow_put_of_predefined_dataset = False 

446 

447 # Leave the dataset in place since some downstream tests require 

448 # something to be present 

449 

450 return butler 

451 

452 def testDeferredCollectionPassing(self): 

453 # Construct a butler with no run or collection, but make it writeable. 

454 butler = Butler(self.tmpConfigFile, writeable=True) 

455 # Create and register a DatasetType 

456 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

457 datasetType = self.addDatasetType( 

458 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

459 ) 

460 # Add needed Dimensions 

461 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

462 butler.registry.insertDimensionData( 

463 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

464 ) 

465 butler.registry.insertDimensionData( 

466 "visit", 

467 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

468 ) 

469 dataId = {"instrument": "DummyCamComp", "visit": 423} 

470 # Create dataset. 

471 metric = makeExampleMetrics() 

472 # Register a new run and put dataset. 

473 run = "deferred" 

474 self.assertTrue(butler.registry.registerRun(run)) 

475 # Second time it will be allowed but indicate no-op 

476 self.assertFalse(butler.registry.registerRun(run)) 

477 ref = butler.put(metric, datasetType, dataId, run=run) 

478 # Putting with no run should fail with TypeError. 

479 with self.assertRaises(CollectionError): 

480 butler.put(metric, datasetType, dataId) 

481 # Dataset should exist. 

482 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

483 # We should be able to get the dataset back, but with and without 

484 # a deferred dataset handle. 

485 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

486 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

487 # Trying to find the dataset without any collection is a TypeError. 

488 with self.assertRaises(CollectionError): 

489 butler.datasetExists(datasetType, dataId) 

490 with self.assertRaises(CollectionError): 

491 butler.get(datasetType, dataId) 

492 # Associate the dataset with a different collection. 

493 butler.registry.registerCollection("tagged") 

494 butler.registry.associate("tagged", [ref]) 

495 # Deleting the dataset from the new collection should make it findable 

496 # in the original collection. 

497 butler.pruneDatasets([ref], tags=["tagged"]) 

498 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

499 

500 

501class ButlerTests(ButlerPutGetTests): 

502 """Tests for Butler.""" 

503 

504 useTempRoot = True 

505 

506 def setUp(self): 

507 """Create a new butler root for each test.""" 

508 self.root = makeTestTempDir(TESTDIR) 

509 Butler.makeRepo(self.root, config=Config(self.configFile)) 

510 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

511 

512 def testConstructor(self): 

513 """Independent test of constructor.""" 

514 butler = Butler(self.tmpConfigFile, run=self.default_run) 

515 self.assertIsInstance(butler, Butler) 

516 

517 # Check that butler.yaml is added automatically. 

518 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

519 config_dir = self.tmpConfigFile[: -len(end)] 

520 butler = Butler(config_dir, run=self.default_run) 

521 self.assertIsInstance(butler, Butler) 

522 

523 collections = set(butler.registry.queryCollections()) 

524 self.assertEqual(collections, {self.default_run}) 

525 

526 # Check that some special characters can be included in run name. 

527 special_run = "u@b.c-A" 

528 butler_special = Butler(butler=butler, run=special_run) 

529 collections = set(butler_special.registry.queryCollections("*@*")) 

530 self.assertEqual(collections, {special_run}) 

531 

532 butler2 = Butler(butler=butler, collections=["other"]) 

533 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"])) 

534 self.assertIsNone(butler2.run) 

535 self.assertIs(butler.datastore, butler2.datastore) 

536 

537 # Test that we can use an environment variable to find this 

538 # repository. 

539 butler_index = Config() 

540 butler_index["label"] = self.tmpConfigFile 

541 for suffix in (".yaml", ".json"): 

542 # Ensure that the content differs so that we know that 

543 # we aren't reusing the cache. 

544 bad_label = f"s3://bucket/not_real{suffix}" 

545 butler_index["bad_label"] = bad_label 

546 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

547 butler_index.dumpToUri(temp_file) 

548 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

549 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label"))) 

550 uri = Butler.get_repo_uri("bad_label") 

551 self.assertEqual(uri, ResourcePath(bad_label)) 

552 uri = Butler.get_repo_uri("label") 

553 butler = Butler(uri, writeable=False) 

554 self.assertIsInstance(butler, Butler) 

555 butler = Butler("label", writeable=False) 

556 self.assertIsInstance(butler, Butler) 

557 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

558 Butler("not_there", writeable=False) 

559 with self.assertRaises(KeyError) as cm: 

560 Butler.get_repo_uri("missing") 

561 self.assertIn("not known to", str(cm.exception)) 

562 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

563 with self.assertRaises(FileNotFoundError): 

564 Butler.get_repo_uri("label") 

565 self.assertEqual(Butler.get_known_repos(), set()) 

566 with self.assertRaises(KeyError) as cm: 

567 # No environment variable set. 

568 Butler.get_repo_uri("label") 

569 self.assertIn("No repository index defined", str(cm.exception)) 

570 with self.assertRaisesRegex(FileNotFoundError, "no known aliases"): 

571 # No aliases registered. 

572 Butler("not_there") 

573 self.assertEqual(Butler.get_known_repos(), set()) 

574 

575 def testBasicPutGet(self): 

576 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

577 self.runPutGetTest(storageClass, "test_metric") 

578 

579 def testCompositePutGetConcrete(self): 

580 

581 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

582 butler = self.runPutGetTest(storageClass, "test_metric") 

583 

584 # Should *not* be disassembled 

585 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

586 self.assertEqual(len(datasets), 1) 

587 uri, components = butler.getURIs(datasets[0]) 

588 self.assertIsInstance(uri, ResourcePath) 

589 self.assertFalse(components) 

590 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

591 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

592 

593 # Predicted dataset 

594 dataId = {"instrument": "DummyCamComp", "visit": 424} 

595 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

596 self.assertFalse(components) 

597 self.assertIsInstance(uri, ResourcePath) 

598 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

599 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

600 

601 def testCompositePutGetVirtual(self): 

602 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

603 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

604 

605 # Should be disassembled 

606 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

607 self.assertEqual(len(datasets), 1) 

608 uri, components = butler.getURIs(datasets[0]) 

609 

610 if butler.datastore.isEphemeral: 

611 # Never disassemble in-memory datastore 

612 self.assertIsInstance(uri, ResourcePath) 

613 self.assertFalse(components) 

614 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

615 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

616 else: 

617 self.assertIsNone(uri) 

618 self.assertEqual(set(components), set(storageClass.components)) 

619 for compuri in components.values(): 

620 self.assertIsInstance(compuri, ResourcePath) 

621 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

622 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

623 

624 # Predicted dataset 

625 dataId = {"instrument": "DummyCamComp", "visit": 424} 

626 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

627 

628 if butler.datastore.isEphemeral: 

629 # Never disassembled 

630 self.assertIsInstance(uri, ResourcePath) 

631 self.assertFalse(components) 

632 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

633 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

634 else: 

635 self.assertIsNone(uri) 

636 self.assertEqual(set(components), set(storageClass.components)) 

637 for compuri in components.values(): 

638 self.assertIsInstance(compuri, ResourcePath) 

639 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

640 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

641 

642 def testIngest(self): 

643 butler = Butler(self.tmpConfigFile, run=self.default_run) 

644 

645 # Create and register a DatasetType 

646 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

647 

648 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

649 datasetTypeName = "metric" 

650 

651 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

652 

653 # Add needed Dimensions 

654 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

655 butler.registry.insertDimensionData( 

656 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

657 ) 

658 for detector in (1, 2): 

659 butler.registry.insertDimensionData( 

660 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

661 ) 

662 

663 butler.registry.insertDimensionData( 

664 "visit", 

665 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

666 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

667 ) 

668 

669 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

670 dataRoot = os.path.join(TESTDIR, "data", "basic") 

671 datasets = [] 

672 for detector in (1, 2): 

673 detector_name = f"detector_{detector}" 

674 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

675 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

676 # Create a DatasetRef for ingest 

677 refIn = DatasetRef(datasetType, dataId, id=None) 

678 

679 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

680 

681 butler.ingest(*datasets, transfer="copy") 

682 

683 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

684 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

685 

686 metrics1 = butler.get(datasetTypeName, dataId1) 

687 metrics2 = butler.get(datasetTypeName, dataId2) 

688 self.assertNotEqual(metrics1, metrics2) 

689 

690 # Compare URIs 

691 uri1 = butler.getURI(datasetTypeName, dataId1) 

692 uri2 = butler.getURI(datasetTypeName, dataId2) 

693 self.assertNotEqual(uri1, uri2) 

694 

695 # Now do a multi-dataset but single file ingest 

696 metricFile = os.path.join(dataRoot, "detectors.yaml") 

697 refs = [] 

698 for detector in (1, 2): 

699 detector_name = f"detector_{detector}" 

700 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

701 # Create a DatasetRef for ingest 

702 refs.append(DatasetRef(datasetType, dataId, id=None)) 

703 

704 datasets = [] 

705 datasets.append(FileDataset(path=metricFile, refs=refs, formatter=MultiDetectorFormatter)) 

706 

707 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

708 

709 # Check that the datastore recorded no file size. 

710 # Not all datastores can support this. 

711 try: 

712 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0]) 

713 self.assertEqual(infos[0].file_size, -1) 

714 except AttributeError: 

715 pass 

716 

717 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

718 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

719 

720 multi1 = butler.get(datasetTypeName, dataId1) 

721 multi2 = butler.get(datasetTypeName, dataId2) 

722 

723 self.assertEqual(multi1, metrics1) 

724 self.assertEqual(multi2, metrics2) 

725 

726 # Compare URIs 

727 uri1 = butler.getURI(datasetTypeName, dataId1) 

728 uri2 = butler.getURI(datasetTypeName, dataId2) 

729 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

730 

731 # Test that removing one does not break the second 

732 # This line will issue a warning log message for a ChainedDatastore 

733 # that uses an InMemoryDatastore since in-memory can not ingest 

734 # files. 

735 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

736 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

737 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

738 multi2b = butler.get(datasetTypeName, dataId2) 

739 self.assertEqual(multi2, multi2b) 

740 

741 def testPruneCollections(self): 

742 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

743 butler = Butler(self.tmpConfigFile, writeable=True) 

744 # Load registry data with dimensions to hang datasets off of. 

745 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

746 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

747 # Add some RUN-type collections. 

748 run1 = "run1" 

749 butler.registry.registerRun(run1) 

750 run2 = "run2" 

751 butler.registry.registerRun(run2) 

752 # put some datasets. ref1 and ref2 have the same data ID, and are in 

753 # different runs. ref3 has a different data ID. 

754 metric = makeExampleMetrics() 

755 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

756 datasetType = self.addDatasetType( 

757 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

758 ) 

759 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

760 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

761 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

762 

763 # Try to delete a RUN collection without purge, or with purge and not 

764 # unstore. 

765 with self.assertRaises(TypeError): 

766 butler.pruneCollection(run1) 

767 with self.assertRaises(TypeError): 

768 butler.pruneCollection(run2, purge=True) 

769 # Add a TAGGED collection and associate ref3 only into it. 

770 tag1 = "tag1" 

771 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

772 self.assertTrue(registered) 

773 # Registering a second time should be allowed. 

774 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

775 self.assertFalse(registered) 

776 butler.registry.associate(tag1, [ref3]) 

777 # Add a CHAINED collection that searches run1 and then run2. It 

778 # logically contains only ref1, because ref2 is shadowed due to them 

779 # having the same data ID and dataset type. 

780 chain1 = "chain1" 

781 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

782 butler.registry.setCollectionChain(chain1, [run1, run2]) 

783 # Try to delete RUN collections, which should fail with complete 

784 # rollback because they're still referenced by the CHAINED 

785 # collection. 

786 with self.assertRaises(Exception): 

787 butler.pruneCollection(run1, pruge=True, unstore=True) 

788 with self.assertRaises(Exception): 

789 butler.pruneCollection(run2, pruge=True, unstore=True) 

790 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

791 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

792 self.assertTrue(existence[ref1]) 

793 self.assertTrue(existence[ref2]) 

794 self.assertTrue(existence[ref3]) 

795 # Try to delete CHAINED and TAGGED collections with purge; should not 

796 # work. 

797 with self.assertRaises(TypeError): 

798 butler.pruneCollection(tag1, purge=True, unstore=True) 

799 with self.assertRaises(TypeError): 

800 butler.pruneCollection(chain1, purge=True, unstore=True) 

801 # Remove the tagged collection with unstore=False. This should not 

802 # affect the datasets. 

803 butler.pruneCollection(tag1) 

804 with self.assertRaises(MissingCollectionError): 

805 butler.registry.getCollectionType(tag1) 

806 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

807 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

808 self.assertTrue(existence[ref1]) 

809 self.assertTrue(existence[ref2]) 

810 self.assertTrue(existence[ref3]) 

811 # Add the tagged collection back in, and remove it with unstore=True. 

812 # This should remove ref3 only from the datastore. 

813 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

814 butler.registry.associate(tag1, [ref3]) 

815 butler.pruneCollection(tag1, unstore=True) 

816 with self.assertRaises(MissingCollectionError): 

817 butler.registry.getCollectionType(tag1) 

818 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

819 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

820 self.assertTrue(existence[ref1]) 

821 self.assertTrue(existence[ref2]) 

822 self.assertFalse(existence[ref3]) 

823 # Delete the chain with unstore=False. The datasets should not be 

824 # affected at all. 

825 butler.pruneCollection(chain1) 

826 with self.assertRaises(MissingCollectionError): 

827 butler.registry.getCollectionType(chain1) 

828 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

829 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

830 self.assertTrue(existence[ref1]) 

831 self.assertTrue(existence[ref2]) 

832 self.assertFalse(existence[ref3]) 

833 # Redefine and then delete the chain with unstore=True. Only ref1 

834 # should be unstored (ref3 has already been unstored, but otherwise 

835 # would be now). 

836 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

837 butler.registry.setCollectionChain(chain1, [run1, run2]) 

838 butler.pruneCollection(chain1, unstore=True) 

839 with self.assertRaises(MissingCollectionError): 

840 butler.registry.getCollectionType(chain1) 

841 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

842 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

843 self.assertFalse(existence[ref1]) 

844 self.assertTrue(existence[ref2]) 

845 self.assertFalse(existence[ref3]) 

846 # Remove run1. This removes ref1 and ref3 from the registry (they're 

847 # already gone from the datastore, which is fine). 

848 butler.pruneCollection(run1, purge=True, unstore=True) 

849 with self.assertRaises(MissingCollectionError): 

850 butler.registry.getCollectionType(run1) 

851 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref2]) 

852 self.assertTrue(butler.datastore.exists(ref2)) 

853 # Remove run2. This removes ref2 from the registry and the datastore. 

854 butler.pruneCollection(run2, purge=True, unstore=True) 

855 with self.assertRaises(MissingCollectionError): 

856 butler.registry.getCollectionType(run2) 

857 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), []) 

858 

859 # Now that the collections have been pruned we can remove the 

860 # dataset type 

861 butler.registry.removeDatasetType(datasetType.name) 

862 

863 def testPickle(self): 

864 """Test pickle support.""" 

865 butler = Butler(self.tmpConfigFile, run=self.default_run) 

866 butlerOut = pickle.loads(pickle.dumps(butler)) 

867 self.assertIsInstance(butlerOut, Butler) 

868 self.assertEqual(butlerOut._config, butler._config) 

869 self.assertEqual(butlerOut.collections, butler.collections) 

870 self.assertEqual(butlerOut.run, butler.run) 

871 

872 def testGetDatasetTypes(self): 

873 butler = Butler(self.tmpConfigFile, run=self.default_run) 

874 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

875 dimensionEntries = [ 

876 ( 

877 "instrument", 

878 {"instrument": "DummyCam"}, 

879 {"instrument": "DummyHSC"}, 

880 {"instrument": "DummyCamComp"}, 

881 ), 

882 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

883 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

884 ] 

885 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

886 # Add needed Dimensions 

887 for args in dimensionEntries: 

888 butler.registry.insertDimensionData(*args) 

889 

890 # When a DatasetType is added to the registry entries are not created 

891 # for components but querying them can return the components. 

892 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

893 components = set() 

894 for datasetTypeName in datasetTypeNames: 

895 # Create and register a DatasetType 

896 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

897 

898 for componentName in storageClass.components: 

899 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

900 

901 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

902 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

903 

904 # Now that we have some dataset types registered, validate them 

905 butler.validateConfiguration( 

906 ignore=[ 

907 "test_metric_comp", 

908 "metric3", 

909 "calexp", 

910 "DummySC", 

911 "datasetType.component", 

912 "random_data", 

913 "random_data_2", 

914 ] 

915 ) 

916 

917 # Add a new datasetType that will fail template validation 

918 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

919 if self.validationCanFail: 

920 with self.assertRaises(ValidationError): 

921 butler.validateConfiguration() 

922 

923 # Rerun validation but with a subset of dataset type names 

924 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

925 

926 # Rerun validation but ignore the bad datasetType 

927 butler.validateConfiguration( 

928 ignore=[ 

929 "test_metric_comp", 

930 "metric3", 

931 "calexp", 

932 "DummySC", 

933 "datasetType.component", 

934 "random_data", 

935 "random_data_2", 

936 ] 

937 ) 

938 

939 def testTransaction(self): 

940 butler = Butler(self.tmpConfigFile, run=self.default_run) 

941 datasetTypeName = "test_metric" 

942 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

943 dimensionEntries = ( 

944 ("instrument", {"instrument": "DummyCam"}), 

945 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

946 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

947 ) 

948 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

949 metric = makeExampleMetrics() 

950 dataId = {"instrument": "DummyCam", "visit": 42} 

951 # Create and register a DatasetType 

952 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

953 with self.assertRaises(TransactionTestError): 

954 with butler.transaction(): 

955 # Add needed Dimensions 

956 for args in dimensionEntries: 

957 butler.registry.insertDimensionData(*args) 

958 # Store a dataset 

959 ref = butler.put(metric, datasetTypeName, dataId) 

960 self.assertIsInstance(ref, DatasetRef) 

961 # Test getDirect 

962 metricOut = butler.getDirect(ref) 

963 self.assertEqual(metric, metricOut) 

964 # Test get 

965 metricOut = butler.get(datasetTypeName, dataId) 

966 self.assertEqual(metric, metricOut) 

967 # Check we can get components 

968 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

969 raise TransactionTestError("This should roll back the entire transaction") 

970 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

971 butler.registry.expandDataId(dataId) 

972 # Should raise LookupError for missing data ID value 

973 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

974 butler.get(datasetTypeName, dataId) 

975 # Also check explicitly if Dataset entry is missing 

976 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

977 # Direct retrieval should not find the file in the Datastore 

978 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

979 butler.getDirect(ref) 

980 

981 def testMakeRepo(self): 

982 """Test that we can write butler configuration to a new repository via 

983 the Butler.makeRepo interface and then instantiate a butler from the 

984 repo root. 

985 """ 

986 # Do not run the test if we know this datastore configuration does 

987 # not support a file system root 

988 if self.fullConfigKey is None: 

989 return 

990 

991 # create two separate directories 

992 root1 = tempfile.mkdtemp(dir=self.root) 

993 root2 = tempfile.mkdtemp(dir=self.root) 

994 

995 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

996 limited = Config(self.configFile) 

997 butler1 = Butler(butlerConfig) 

998 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

999 full = Config(self.tmpConfigFile) 

1000 butler2 = Butler(butlerConfig) 

1001 # Butlers should have the same configuration regardless of whether 

1002 # defaults were expanded. 

1003 self.assertEqual(butler1._config, butler2._config) 

1004 # Config files loaded directly should not be the same. 

1005 self.assertNotEqual(limited, full) 

1006 # Make sure "limited" doesn't have a few keys we know it should be 

1007 # inheriting from defaults. 

1008 self.assertIn(self.fullConfigKey, full) 

1009 self.assertNotIn(self.fullConfigKey, limited) 

1010 

1011 # Collections don't appear until something is put in them 

1012 collections1 = set(butler1.registry.queryCollections()) 

1013 self.assertEqual(collections1, set()) 

1014 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1015 

1016 # Check that a config with no associated file name will not 

1017 # work properly with relocatable Butler repo 

1018 butlerConfig.configFile = None 

1019 with self.assertRaises(ValueError): 

1020 Butler(butlerConfig) 

1021 

1022 with self.assertRaises(FileExistsError): 

1023 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1024 

1025 def testStringification(self): 

1026 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1027 butlerStr = str(butler) 

1028 

1029 if self.datastoreStr is not None: 

1030 for testStr in self.datastoreStr: 

1031 self.assertIn(testStr, butlerStr) 

1032 if self.registryStr is not None: 

1033 self.assertIn(self.registryStr, butlerStr) 

1034 

1035 datastoreName = butler.datastore.name 

1036 if self.datastoreName is not None: 

1037 for testStr in self.datastoreName: 

1038 self.assertIn(testStr, datastoreName) 

1039 

1040 def testButlerRewriteDataId(self): 

1041 """Test that dataIds can be rewritten based on dimension records.""" 

1042 

1043 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1044 

1045 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1046 datasetTypeName = "random_data" 

1047 

1048 # Create dimension records. 

1049 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1050 butler.registry.insertDimensionData( 

1051 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1052 ) 

1053 butler.registry.insertDimensionData( 

1054 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1055 ) 

1056 

1057 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1058 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1059 butler.registry.registerDatasetType(datasetType) 

1060 

1061 n_exposures = 5 

1062 dayobs = 20210530 

1063 

1064 for i in range(n_exposures): 

1065 butler.registry.insertDimensionData( 

1066 "exposure", 

1067 { 

1068 "instrument": "DummyCamComp", 

1069 "id": i, 

1070 "obs_id": f"exp{i}", 

1071 "seq_num": i, 

1072 "day_obs": dayobs, 

1073 "physical_filter": "d-r", 

1074 }, 

1075 ) 

1076 

1077 # Write some data. 

1078 for i in range(n_exposures): 

1079 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1080 

1081 # Use the seq_num for the put to test rewriting. 

1082 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1083 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1084 

1085 # Check that the exposure is correct in the dataId 

1086 self.assertEqual(ref.dataId["exposure"], i) 

1087 

1088 # and check that we can get the dataset back with the same dataId 

1089 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1090 self.assertEqual(new_metric, metric) 

1091 

1092 

1093class FileDatastoreButlerTests(ButlerTests): 

1094 """Common tests and specialization of ButlerTests for butlers backed 

1095 by datastores that inherit from FileDatastore. 

1096 """ 

1097 

1098 def checkFileExists(self, root, relpath): 

1099 """Checks if file exists at a given path (relative to root). 

1100 

1101 Test testPutTemplates verifies actual physical existance of the files 

1102 in the requested location. 

1103 """ 

1104 uri = ResourcePath(root, forceDirectory=True) 

1105 return uri.join(relpath).exists() 

1106 

1107 def testPutTemplates(self): 

1108 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1109 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1110 

1111 # Add needed Dimensions 

1112 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1113 butler.registry.insertDimensionData( 

1114 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1115 ) 

1116 butler.registry.insertDimensionData( 

1117 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1118 ) 

1119 butler.registry.insertDimensionData( 

1120 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1121 ) 

1122 

1123 # Create and store a dataset 

1124 metric = makeExampleMetrics() 

1125 

1126 # Create two almost-identical DatasetTypes (both will use default 

1127 # template) 

1128 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1129 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1130 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1131 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1132 

1133 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1134 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1135 

1136 # Put with exactly the data ID keys needed 

1137 ref = butler.put(metric, "metric1", dataId1) 

1138 uri = butler.getURI(ref) 

1139 self.assertTrue( 

1140 self.checkFileExists( 

1141 butler.datastore.root, f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle" 

1142 ), 

1143 f"Checking existence of {uri}", 

1144 ) 

1145 

1146 # Check the template based on dimensions 

1147 butler.datastore.templates.validateTemplates([ref]) 

1148 

1149 # Put with extra data ID keys (physical_filter is an optional 

1150 # dependency); should not change template (at least the way we're 

1151 # defining them to behave now; the important thing is that they 

1152 # must be consistent). 

1153 ref = butler.put(metric, "metric2", dataId2) 

1154 uri = butler.getURI(ref) 

1155 self.assertTrue( 

1156 self.checkFileExists( 

1157 butler.datastore.root, f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle" 

1158 ), 

1159 f"Checking existence of {uri}", 

1160 ) 

1161 

1162 # Check the template based on dimensions 

1163 butler.datastore.templates.validateTemplates([ref]) 

1164 

1165 # Now use a file template that will not result in unique filenames 

1166 with self.assertRaises(FileTemplateValidationError): 

1167 butler.put(metric, "metric3", dataId1) 

1168 

1169 def testImportExport(self): 

1170 # Run put/get tests just to create and populate a repo. 

1171 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1172 self.runImportExportTest(storageClass) 

1173 

1174 @unittest.expectedFailure 

1175 def testImportExportVirtualComposite(self): 

1176 # Run put/get tests just to create and populate a repo. 

1177 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1178 self.runImportExportTest(storageClass) 

1179 

1180 def runImportExportTest(self, storageClass): 

1181 """This test does an export to a temp directory and an import back 

1182 into a new temp directory repo. It does not assume a posix datastore""" 

1183 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1184 print("Root:", exportButler.datastore.root) 

1185 # Test that the repo actually has at least one dataset. 

1186 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1187 self.assertGreater(len(datasets), 0) 

1188 # Add a DimensionRecord that's unused by those datasets. 

1189 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1190 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1191 # Export and then import datasets. 

1192 with safeTestTempDir(TESTDIR) as exportDir: 

1193 exportFile = os.path.join(exportDir, "exports.yaml") 

1194 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1195 export.saveDatasets(datasets) 

1196 # Export the same datasets again. This should quietly do 

1197 # nothing because of internal deduplication, and it shouldn't 

1198 # complain about being asked to export the "htm7" elements even 

1199 # though there aren't any in these datasets or in the database. 

1200 export.saveDatasets(datasets, elements=["htm7"]) 

1201 # Save one of the data IDs again; this should be harmless 

1202 # because of internal deduplication. 

1203 export.saveDataIds([datasets[0].dataId]) 

1204 # Save some dimension records directly. 

1205 export.saveDimensionData("skymap", [skymapRecord]) 

1206 self.assertTrue(os.path.exists(exportFile)) 

1207 with safeTestTempDir(TESTDIR) as importDir: 

1208 # We always want this to be a local posix butler 

1209 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1210 # Calling script.butlerImport tests the implementation of the 

1211 # butler command line interface "import" subcommand. Functions 

1212 # in the script folder are generally considered protected and 

1213 # should not be used as public api. 

1214 with open(exportFile, "r") as f: 

1215 script.butlerImport( 

1216 importDir, 

1217 export_file=f, 

1218 directory=exportDir, 

1219 transfer="auto", 

1220 skip_dimensions=None, 

1221 reuse_ids=False, 

1222 ) 

1223 importButler = Butler(importDir, run=self.default_run) 

1224 for ref in datasets: 

1225 with self.subTest(ref=ref): 

1226 # Test for existence by passing in the DatasetType and 

1227 # data ID separately, to avoid lookup by dataset_id. 

1228 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1229 self.assertEqual( 

1230 list(importButler.registry.queryDimensionRecords("skymap")), 

1231 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)], 

1232 ) 

1233 

1234 def testRemoveRuns(self): 

1235 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1236 butler = Butler(self.tmpConfigFile, writeable=True) 

1237 # Load registry data with dimensions to hang datasets off of. 

1238 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1239 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1240 # Add some RUN-type collection. 

1241 run1 = "run1" 

1242 butler.registry.registerRun(run1) 

1243 run2 = "run2" 

1244 butler.registry.registerRun(run2) 

1245 # put a dataset in each 

1246 metric = makeExampleMetrics() 

1247 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1248 datasetType = self.addDatasetType( 

1249 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1250 ) 

1251 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1252 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1253 uri1 = butler.getURI(ref1, collections=[run1]) 

1254 uri2 = butler.getURI(ref2, collections=[run2]) 

1255 # Remove from both runs with different values for unstore. 

1256 butler.removeRuns([run1], unstore=True) 

1257 butler.removeRuns([run2], unstore=False) 

1258 # Should be nothing in registry for either one, and datastore should 

1259 # not think either exists. 

1260 with self.assertRaises(MissingCollectionError): 

1261 butler.registry.getCollectionType(run1) 

1262 with self.assertRaises(MissingCollectionError): 

1263 butler.registry.getCollectionType(run2) 

1264 self.assertFalse(butler.datastore.exists(ref1)) 

1265 self.assertFalse(butler.datastore.exists(ref2)) 

1266 # The ref we unstored should be gone according to the URI, but the 

1267 # one we forgot should still be around. 

1268 self.assertFalse(uri1.exists()) 

1269 self.assertTrue(uri2.exists()) 

1270 

1271 

1272class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1273 """PosixDatastore specialization of a butler""" 

1274 

1275 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1276 fullConfigKey = ".datastore.formatters" 

1277 validationCanFail = True 

1278 datastoreStr = ["/tmp"] 

1279 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1280 registryStr = "/gen3.sqlite3" 

1281 

1282 def testPathConstructor(self): 

1283 """Independent test of constructor using PathLike.""" 

1284 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1285 self.assertIsInstance(butler, Butler) 

1286 

1287 # And again with a Path object with the butler yaml 

1288 path = pathlib.Path(self.tmpConfigFile) 

1289 butler = Butler(path, writeable=False) 

1290 self.assertIsInstance(butler, Butler) 

1291 

1292 # And again with a Path object without the butler yaml 

1293 # (making sure we skip it if the tmp config doesn't end 

1294 # in butler.yaml -- which is the case for a subclass) 

1295 if self.tmpConfigFile.endswith("butler.yaml"): 

1296 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1297 butler = Butler(path, writeable=False) 

1298 self.assertIsInstance(butler, Butler) 

1299 

1300 def testExportTransferCopy(self): 

1301 """Test local export using all transfer modes""" 

1302 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1303 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1304 # Test that the repo actually has at least one dataset. 

1305 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1306 self.assertGreater(len(datasets), 0) 

1307 uris = [exportButler.getURI(d) for d in datasets] 

1308 datastoreRoot = exportButler.datastore.root 

1309 

1310 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1311 

1312 for path in pathsInStore: 

1313 # Assume local file system 

1314 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1315 

1316 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1317 with safeTestTempDir(TESTDIR) as exportDir: 

1318 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1319 export.saveDatasets(datasets) 

1320 for path in pathsInStore: 

1321 self.assertTrue( 

1322 self.checkFileExists(exportDir, path), 

1323 f"Check that mode {transfer} exported files", 

1324 ) 

1325 

1326 def testPruneDatasets(self): 

1327 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1328 butler = Butler(self.tmpConfigFile, writeable=True) 

1329 # Load registry data with dimensions to hang datasets off of. 

1330 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1331 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1332 # Add some RUN-type collections. 

1333 run1 = "run1" 

1334 butler.registry.registerRun(run1) 

1335 run2 = "run2" 

1336 butler.registry.registerRun(run2) 

1337 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1338 # different runs. ref3 has a different data ID. 

1339 metric = makeExampleMetrics() 

1340 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1341 datasetType = self.addDatasetType( 

1342 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1343 ) 

1344 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1345 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1346 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1347 

1348 # Simple prune. 

1349 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1350 with self.assertRaises(LookupError): 

1351 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1) 

1352 

1353 # Put data back. 

1354 ref1 = butler.put(metric, ref1.unresolved(), run=run1) 

1355 ref2 = butler.put(metric, ref2.unresolved(), run=run2) 

1356 ref3 = butler.put(metric, ref3.unresolved(), run=run1) 

1357 

1358 # Check that in normal mode, deleting the record will lead to 

1359 # trash not touching the file. 

1360 uri1 = butler.datastore.getURI(ref1) 

1361 butler.datastore.bridge.moveToTrash([ref1]) # Update the dataset_location table 

1362 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id}) 

1363 butler.datastore.trash(ref1) 

1364 butler.datastore.emptyTrash() 

1365 self.assertTrue(uri1.exists()) 

1366 uri1.remove() # Clean it up. 

1367 

1368 # Simulate execution butler setup by deleting the datastore 

1369 # record but keeping the file around and trusting. 

1370 butler.datastore.trustGetRequest = True 

1371 uri2 = butler.datastore.getURI(ref2) 

1372 uri3 = butler.datastore.getURI(ref3) 

1373 self.assertTrue(uri2.exists()) 

1374 self.assertTrue(uri3.exists()) 

1375 

1376 # Remove the datastore record. 

1377 butler.datastore.bridge.moveToTrash([ref2]) # Update the dataset_location table 

1378 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id}) 

1379 self.assertTrue(uri2.exists()) 

1380 butler.datastore.trash([ref2, ref3]) 

1381 # Immediate removal for ref2 file 

1382 self.assertFalse(uri2.exists()) 

1383 # But ref3 has to wait for the empty. 

1384 self.assertTrue(uri3.exists()) 

1385 butler.datastore.emptyTrash() 

1386 self.assertFalse(uri3.exists()) 

1387 

1388 # Clear out the datasets from registry. 

1389 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1390 

1391 def testPytypePutCoercion(self): 

1392 """Test python type coercion on Butler.get and put.""" 

1393 

1394 # Store some data with the normal example storage class. 

1395 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1396 datasetTypeName = "test_metric" 

1397 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

1398 

1399 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1400 

1401 # Put a dict and this should coerce to a MetricsExample 

1402 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

1403 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

1404 test_metric = butler.getDirect(metric_ref) 

1405 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

1406 self.assertEqual(test_metric.summary, test_dict["summary"]) 

1407 self.assertEqual(test_metric.output, test_dict["output"]) 

1408 

1409 # Check that the put still works if a DatasetType is given with 

1410 # a definition matching this python type. 

1411 registry_type = butler.registry.getDatasetType(datasetTypeName) 

1412 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

1413 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

1414 self.assertEqual(metric2_ref.datasetType, registry_type) 

1415 

1416 # The get will return the type expected by registry. 

1417 test_metric2 = butler.getDirect(metric2_ref) 

1418 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

1419 

1420 # Make a new DatasetRef with the compatible but different DatasetType. 

1421 # This should now return a dict. 

1422 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

1423 test_dict2 = butler.getDirect(new_ref) 

1424 self.assertEqual(get_full_type_name(test_dict2), "dict") 

1425 

1426 # Get it again with the wrong dataset type definition using get() 

1427 # rather than getDirect(). This should be consistent with getDirect() 

1428 # behavior and return the type of the DatasetType. 

1429 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

1430 self.assertEqual(get_full_type_name(test_dict3), "dict") 

1431 

1432 def testPytypeCoercion(self): 

1433 """Test python type coercion on Butler.get and put.""" 

1434 

1435 # Store some data with the normal example storage class. 

1436 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1437 datasetTypeName = "test_metric" 

1438 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1439 

1440 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1441 metric = butler.get(datasetTypeName, dataId=dataId) 

1442 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1443 

1444 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1445 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1446 

1447 # Now need to hack the registry dataset type definition. 

1448 # There is no API for this. 

1449 manager = butler.registry._managers.datasets 

1450 manager._db.update( 

1451 manager._static.dataset_type, 

1452 {"name": datasetTypeName}, 

1453 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1454 ) 

1455 

1456 # Force reset of dataset type cache 

1457 butler.registry.refresh() 

1458 

1459 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1460 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1461 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1462 

1463 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1464 self.assertNotEqual(type(metric_model), type(metric)) 

1465 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1466 

1467 # Put the model and read it back to show that everything now 

1468 # works as normal. 

1469 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1470 metric_model_new = butler.get(metric_ref) 

1471 self.assertEqual(metric_model_new, metric_model) 

1472 

1473 # Hack the storage class again to something that will fail on the 

1474 # get with no conversion class. 

1475 manager._db.update( 

1476 manager._static.dataset_type, 

1477 {"name": datasetTypeName}, 

1478 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1479 ) 

1480 butler.registry.refresh() 

1481 

1482 with self.assertRaises(ValueError): 

1483 butler.get(datasetTypeName, dataId=dataId) 

1484 

1485 

1486class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1487 """InMemoryDatastore specialization of a butler""" 

1488 

1489 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1490 fullConfigKey = None 

1491 useTempRoot = False 

1492 validationCanFail = False 

1493 datastoreStr = ["datastore='InMemory"] 

1494 datastoreName = ["InMemoryDatastore@"] 

1495 registryStr = "/gen3.sqlite3" 

1496 

1497 def testIngest(self): 

1498 pass 

1499 

1500 

1501class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1502 """PosixDatastore specialization""" 

1503 

1504 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1505 fullConfigKey = ".datastore.datastores.1.formatters" 

1506 validationCanFail = True 

1507 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1508 datastoreName = [ 

1509 "InMemoryDatastore@", 

1510 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1511 "SecondDatastore", 

1512 ] 

1513 registryStr = "/gen3.sqlite3" 

1514 

1515 

1516class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1517 """Test that a yaml file in one location can refer to a root in another.""" 

1518 

1519 datastoreStr = ["dir1"] 

1520 # Disable the makeRepo test since we are deliberately not using 

1521 # butler.yaml as the config name. 

1522 fullConfigKey = None 

1523 

1524 def setUp(self): 

1525 self.root = makeTestTempDir(TESTDIR) 

1526 

1527 # Make a new repository in one place 

1528 self.dir1 = os.path.join(self.root, "dir1") 

1529 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1530 

1531 # Move the yaml file to a different place and add a "root" 

1532 self.dir2 = os.path.join(self.root, "dir2") 

1533 os.makedirs(self.dir2, exist_ok=True) 

1534 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1535 config = Config(configFile1) 

1536 config["root"] = self.dir1 

1537 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1538 config.dumpToUri(configFile2) 

1539 os.remove(configFile1) 

1540 self.tmpConfigFile = configFile2 

1541 

1542 def testFileLocations(self): 

1543 self.assertNotEqual(self.dir1, self.dir2) 

1544 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1545 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1546 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1547 

1548 

1549class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1550 """Test that a config file created by makeRepo outside of repo works.""" 

1551 

1552 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1553 

1554 def setUp(self): 

1555 self.root = makeTestTempDir(TESTDIR) 

1556 self.root2 = makeTestTempDir(TESTDIR) 

1557 

1558 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1559 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1560 

1561 def tearDown(self): 

1562 if os.path.exists(self.root2): 

1563 shutil.rmtree(self.root2, ignore_errors=True) 

1564 super().tearDown() 

1565 

1566 def testConfigExistence(self): 

1567 c = Config(self.tmpConfigFile) 

1568 uri_config = ResourcePath(c["root"]) 

1569 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1570 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1571 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1572 

1573 def testPutGet(self): 

1574 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1575 self.runPutGetTest(storageClass, "test_metric") 

1576 

1577 

1578class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1579 """Test that a config file created by makeRepo outside of repo works.""" 

1580 

1581 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1582 

1583 def setUp(self): 

1584 self.root = makeTestTempDir(TESTDIR) 

1585 self.root2 = makeTestTempDir(TESTDIR) 

1586 

1587 self.tmpConfigFile = self.root2 

1588 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1589 

1590 def testConfigExistence(self): 

1591 # Append the yaml file else Config constructor does not know the file 

1592 # type. 

1593 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1594 super().testConfigExistence() 

1595 

1596 

1597class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1598 """Test that a config file created by makeRepo outside of repo works.""" 

1599 

1600 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1601 

1602 def setUp(self): 

1603 self.root = makeTestTempDir(TESTDIR) 

1604 self.root2 = makeTestTempDir(TESTDIR) 

1605 

1606 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1607 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1608 

1609 

1610@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1611class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1612 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1613 a local in-memory SqlRegistry. 

1614 """ 

1615 

1616 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1617 fullConfigKey = None 

1618 validationCanFail = True 

1619 

1620 bucketName = "anybucketname" 

1621 """Name of the Bucket that will be used in the tests. The name is read from 

1622 the config file used with the tests during set-up. 

1623 """ 

1624 

1625 root = "butlerRoot/" 

1626 """Root repository directory expected to be used in case useTempRoot=False. 

1627 Otherwise the root is set to a 20 characters long randomly generated string 

1628 during set-up. 

1629 """ 

1630 

1631 datastoreStr = [f"datastore={root}"] 

1632 """Contains all expected root locations in a format expected to be 

1633 returned by Butler stringification. 

1634 """ 

1635 

1636 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1637 """The expected format of the S3 Datastore string.""" 

1638 

1639 registryStr = "/gen3.sqlite3" 

1640 """Expected format of the Registry string.""" 

1641 

1642 mock_s3 = mock_s3() 

1643 """The mocked s3 interface from moto.""" 

1644 

1645 def genRoot(self): 

1646 """Returns a random string of len 20 to serve as a root 

1647 name for the temporary bucket repo. 

1648 

1649 This is equivalent to tempfile.mkdtemp as this is what self.root 

1650 becomes when useTempRoot is True. 

1651 """ 

1652 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1653 return rndstr + "/" 

1654 

1655 def setUp(self): 

1656 config = Config(self.configFile) 

1657 uri = ResourcePath(config[".datastore.datastore.root"]) 

1658 self.bucketName = uri.netloc 

1659 

1660 # Enable S3 mocking of tests. 

1661 self.mock_s3.start() 

1662 

1663 # set up some fake credentials if they do not exist 

1664 self.usingDummyCredentials = setAwsEnvCredentials() 

1665 

1666 if self.useTempRoot: 

1667 self.root = self.genRoot() 

1668 rooturi = f"s3://{self.bucketName}/{self.root}" 

1669 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1670 

1671 # need local folder to store registry database 

1672 self.reg_dir = makeTestTempDir(TESTDIR) 

1673 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1674 

1675 # MOTO needs to know that we expect Bucket bucketname to exist 

1676 # (this used to be the class attribute bucketName) 

1677 s3 = boto3.resource("s3") 

1678 s3.create_bucket(Bucket=self.bucketName) 

1679 

1680 self.datastoreStr = f"datastore={self.root}" 

1681 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1682 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1683 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1684 

1685 def tearDown(self): 

1686 s3 = boto3.resource("s3") 

1687 bucket = s3.Bucket(self.bucketName) 

1688 try: 

1689 bucket.objects.all().delete() 

1690 except botocore.exceptions.ClientError as e: 

1691 if e.response["Error"]["Code"] == "404": 

1692 # the key was not reachable - pass 

1693 pass 

1694 else: 

1695 raise 

1696 

1697 bucket = s3.Bucket(self.bucketName) 

1698 bucket.delete() 

1699 

1700 # Stop the S3 mock. 

1701 self.mock_s3.stop() 

1702 

1703 # unset any potentially set dummy credentials 

1704 if self.usingDummyCredentials: 

1705 unsetAwsEnvCredentials() 

1706 

1707 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1708 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1709 

1710 if self.useTempRoot and os.path.exists(self.root): 

1711 shutil.rmtree(self.root, ignore_errors=True) 

1712 

1713 

1714@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1715# Mock required environment variables during tests 

1716@unittest.mock.patch.dict( 

1717 os.environ, 

1718 { 

1719 "LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1720 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(TESTDIR, "config/testConfigs/webdav/token"), 

1721 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs", 

1722 }, 

1723) 

1724class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1725 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1726 a local in-memory SqlRegistry. 

1727 """ 

1728 

1729 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1730 fullConfigKey = None 

1731 validationCanFail = True 

1732 

1733 serverName = "localhost" 

1734 """Name of the server that will be used in the tests. 

1735 """ 

1736 

1737 portNumber = 8080 

1738 """Port on which the webdav server listens. Automatically chosen 

1739 at setUpClass via the _getfreeport() method 

1740 """ 

1741 

1742 root = "butlerRoot/" 

1743 """Root repository directory expected to be used in case useTempRoot=False. 

1744 Otherwise the root is set to a 20 characters long randomly generated string 

1745 during set-up. 

1746 """ 

1747 

1748 datastoreStr = [f"datastore={root}"] 

1749 """Contains all expected root locations in a format expected to be 

1750 returned by Butler stringification. 

1751 """ 

1752 

1753 datastoreName = ["FileDatastore@https://{serverName}/{root}"] 

1754 """The expected format of the WebdavDatastore string.""" 

1755 

1756 registryStr = "/gen3.sqlite3" 

1757 """Expected format of the Registry string.""" 

1758 

1759 serverThread = None 

1760 """Thread in which the local webdav server will run""" 

1761 

1762 stopWebdavServer = False 

1763 """This flag will cause the webdav server to 

1764 gracefully shut down when True 

1765 """ 

1766 

1767 def genRoot(self): 

1768 """Returns a random string of len 20 to serve as a root 

1769 name for the temporary bucket repo. 

1770 

1771 This is equivalent to tempfile.mkdtemp as this is what self.root 

1772 becomes when useTempRoot is True. 

1773 """ 

1774 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1775 return rndstr + "/" 

1776 

1777 @classmethod 

1778 def setUpClass(cls): 

1779 # Do the same as inherited class 

1780 cls.storageClassFactory = StorageClassFactory() 

1781 cls.storageClassFactory.addFromConfig(cls.configFile) 

1782 

1783 cls.portNumber = cls._getfreeport() 

1784 # Run a local webdav server on which tests will be run 

1785 cls.serverThread = Thread( 

1786 target=cls._serveWebdav, args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), daemon=True 

1787 ) 

1788 cls.serverThread.start() 

1789 # Wait for it to start 

1790 time.sleep(3) 

1791 

1792 @classmethod 

1793 def tearDownClass(cls): 

1794 # Ask for graceful shut down of the webdav server 

1795 cls.stopWebdavServer = True 

1796 # Wait for the thread to exit 

1797 cls.serverThread.join() 

1798 

1799 # Mock required environment variables during tests 

1800 @unittest.mock.patch.dict( 

1801 os.environ, 

1802 { 

1803 "LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1804 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(TESTDIR, "config/testConfigs/webdav/token"), 

1805 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs", 

1806 }, 

1807 ) 

1808 def setUp(self): 

1809 config = Config(self.configFile) 

1810 

1811 if self.useTempRoot: 

1812 self.root = self.genRoot() 

1813 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1814 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1815 

1816 # need local folder to store registry database 

1817 self.reg_dir = makeTestTempDir(TESTDIR) 

1818 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1819 

1820 self.datastoreStr = f"datastore={self.root}" 

1821 self.datastoreName = [f"FileDatastore@{self.rooturi}"] 

1822 

1823 if not isWebdavEndpoint(self.rooturi): 

1824 raise OSError("Webdav server not running properly: cannot run tests.") 

1825 

1826 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1827 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1828 

1829 # Mock required environment variables during tests 

1830 @unittest.mock.patch.dict( 

1831 os.environ, 

1832 { 

1833 "LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1834 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(TESTDIR, "config/testConfigs/webdav/token"), 

1835 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs", 

1836 }, 

1837 ) 

1838 def tearDown(self): 

1839 # Clear temporary directory 

1840 ResourcePath(self.rooturi).remove() 

1841 ResourcePath(self.rooturi).session.close() 

1842 

1843 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1844 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1845 

1846 if self.useTempRoot and os.path.exists(self.root): 

1847 shutil.rmtree(self.root, ignore_errors=True) 

1848 

1849 def _serveWebdav(self, port: int, stopWebdavServer): 

1850 """Starts a local webdav-compatible HTTP server, 

1851 Listening on http://localhost:port 

1852 This server only runs when this test class is instantiated, 

1853 and then shuts down. Must be started is a separate thread. 

1854 

1855 Parameters 

1856 ---------- 

1857 port : `int` 

1858 The port number on which the server should listen 

1859 """ 

1860 root_path = gettempdir() 

1861 

1862 config = { 

1863 "host": "0.0.0.0", 

1864 "port": port, 

1865 "provider_mapping": {"/": root_path}, 

1866 "http_authenticator": {"domain_controller": None}, 

1867 "simple_dc": {"user_mapping": {"*": True}}, 

1868 "verbose": 0, 

1869 } 

1870 app = WsgiDAVApp(config) 

1871 

1872 server_args = { 

1873 "bind_addr": (config["host"], config["port"]), 

1874 "wsgi_app": app, 

1875 } 

1876 server = wsgi.Server(**server_args) 

1877 server.prepare() 

1878 

1879 try: 

1880 # Start the actual server in a separate thread 

1881 t = Thread(target=server.serve, daemon=True) 

1882 t.start() 

1883 # watch stopWebdavServer, and gracefully 

1884 # shut down the server when True 

1885 while True: 

1886 if stopWebdavServer(): 

1887 break 

1888 time.sleep(1) 

1889 except KeyboardInterrupt: 

1890 print("Caught Ctrl-C, shutting down...") 

1891 finally: 

1892 server.stop() 

1893 t.join() 

1894 

1895 def _getfreeport(): 

1896 """ 

1897 Determines a free port using sockets. 

1898 """ 

1899 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

1900 free_socket.bind(("0.0.0.0", 0)) 

1901 free_socket.listen() 

1902 port = free_socket.getsockname()[1] 

1903 free_socket.close() 

1904 return port 

1905 

1906 

1907class PosixDatastoreTransfers(unittest.TestCase): 

1908 """Test data transfers between butlers. 

1909 

1910 Test for different managers. UUID to UUID and integer to integer are 

1911 tested. UUID to integer is not supported since we do not currently 

1912 want to allow that. Integer to UUID is supported with the caveat 

1913 that UUID4 will be generated and this will be incorrect for raw 

1914 dataset types. The test ignores that. 

1915 """ 

1916 

1917 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1918 

1919 @classmethod 

1920 def setUpClass(cls): 

1921 cls.storageClassFactory = StorageClassFactory() 

1922 cls.storageClassFactory.addFromConfig(cls.configFile) 

1923 

1924 def setUp(self): 

1925 self.root = makeTestTempDir(TESTDIR) 

1926 self.config = Config(self.configFile) 

1927 

1928 def tearDown(self): 

1929 removeTestTempDir(self.root) 

1930 

1931 def create_butler(self, manager, label): 

1932 config = Config(self.configFile) 

1933 config["registry", "managers", "datasets"] = manager 

1934 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

1935 

1936 def create_butlers(self, manager1, manager2): 

1937 self.source_butler = self.create_butler(manager1, "1") 

1938 self.target_butler = self.create_butler(manager2, "2") 

1939 

1940 def testTransferUuidToUuid(self): 

1941 self.create_butlers( 

1942 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1943 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1944 ) 

1945 # Setting id_gen_map should have no effect here 

1946 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1947 

1948 def testTransferIntToInt(self): 

1949 self.create_butlers( 

1950 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

1951 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

1952 ) 

1953 # int dataset ID only allows UNIQUE 

1954 self.assertButlerTransfers() 

1955 

1956 def testTransferIntToUuid(self): 

1957 self.create_butlers( 

1958 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

1959 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1960 ) 

1961 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1962 

1963 def testTransferMissing(self): 

1964 """Test transfers where datastore records are missing. 

1965 

1966 This is how execution butler works. 

1967 """ 

1968 self.create_butlers( 

1969 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1970 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1971 ) 

1972 

1973 # Configure the source butler to allow trust. 

1974 self.source_butler.datastore.trustGetRequest = True 

1975 

1976 self.assertButlerTransfers(purge=True) 

1977 

1978 def testTransferMissingDisassembly(self): 

1979 """Test transfers where datastore records are missing. 

1980 

1981 This is how execution butler works. 

1982 """ 

1983 self.create_butlers( 

1984 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1985 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1986 ) 

1987 

1988 # Configure the source butler to allow trust. 

1989 self.source_butler.datastore.trustGetRequest = True 

1990 

1991 # Test disassembly. 

1992 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

1993 

1994 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"): 

1995 """Test that a run can be transferred to another butler.""" 

1996 

1997 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

1998 datasetTypeName = "random_data" 

1999 

2000 # Test will create 3 collections and we will want to transfer 

2001 # two of those three. 

2002 runs = ["run1", "run2", "other"] 

2003 

2004 # Also want to use two different dataset types to ensure that 

2005 # grouping works. 

2006 datasetTypeNames = ["random_data", "random_data_2"] 

2007 

2008 # Create the run collections in the source butler. 

2009 for run in runs: 

2010 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2011 

2012 # Create dimensions in both butlers (transfer will not create them). 

2013 n_exposures = 30 

2014 for butler in (self.source_butler, self.target_butler): 

2015 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

2016 butler.registry.insertDimensionData( 

2017 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

2018 ) 

2019 butler.registry.insertDimensionData( 

2020 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

2021 ) 

2022 

2023 for i in range(n_exposures): 

2024 butler.registry.insertDimensionData( 

2025 "exposure", 

2026 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

2027 ) 

2028 

2029 # Create dataset types in the source butler. 

2030 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

2031 for datasetTypeName in datasetTypeNames: 

2032 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2033 self.source_butler.registry.registerDatasetType(datasetType) 

2034 

2035 # Write a dataset to an unrelated run -- this will ensure that 

2036 # we are rewriting integer dataset ids in the target if necessary. 

2037 # Will not be relevant for UUID. 

2038 run = "distraction" 

2039 butler = Butler(butler=self.source_butler, run=run) 

2040 butler.put( 

2041 makeExampleMetrics(), 

2042 datasetTypeName, 

2043 exposure=1, 

2044 instrument="DummyCamComp", 

2045 physical_filter="d-r", 

2046 ) 

2047 

2048 # Write some example metrics to the source 

2049 butler = Butler(butler=self.source_butler) 

2050 

2051 # Set of DatasetRefs that should be in the list of refs to transfer 

2052 # but which will not be transferred. 

2053 deleted = set() 

2054 

2055 n_expected = 20 # Number of datasets expected to be transferred 

2056 source_refs = [] 

2057 for i in range(n_exposures): 

2058 # Put a third of datasets into each collection, only retain 

2059 # two thirds. 

2060 index = i % 3 

2061 run = runs[index] 

2062 datasetTypeName = datasetTypeNames[i % 2] 

2063 

2064 metric_data = { 

2065 "summary": {"counter": i}, 

2066 "output": {"text": "metric"}, 

2067 "data": [2 * x for x in range(i)], 

2068 } 

2069 metric = MetricsExample(**metric_data) 

2070 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2071 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2072 

2073 # Remove the datastore record using low-level API 

2074 if purge: 

2075 # Remove records for a fraction. 

2076 if index == 1: 

2077 

2078 # For one of these delete the file as well. 

2079 # This allows the "missing" code to filter the 

2080 # file out. 

2081 if not deleted: 

2082 primary, uris = butler.datastore.getURIs(ref) 

2083 if primary: 

2084 primary.remove() 

2085 for uri in uris.values(): 

2086 uri.remove() 

2087 n_expected -= 1 

2088 deleted.add(ref) 

2089 

2090 # Remove the datastore record. 

2091 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

2092 

2093 if index < 2: 

2094 source_refs.append(ref) 

2095 if ref not in deleted: 

2096 new_metric = butler.get(ref.unresolved(), collections=run) 

2097 self.assertEqual(new_metric, metric) 

2098 

2099 # Create some bad dataset types to ensure we check for inconsistent 

2100 # definitions. 

2101 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2102 for datasetTypeName in datasetTypeNames: 

2103 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2104 self.target_butler.registry.registerDatasetType(datasetType) 

2105 with self.assertRaises(ConflictingDefinitionError): 

2106 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2107 # And remove the bad definitions. 

2108 for datasetTypeName in datasetTypeNames: 

2109 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2110 

2111 # Transfer without creating dataset types should fail. 

2112 with self.assertRaises(KeyError): 

2113 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2114 

2115 # Now transfer them to the second butler 

2116 with self.assertLogs(level=logging.DEBUG) as cm: 

2117 transferred = self.target_butler.transfer_from( 

2118 self.source_butler, source_refs, id_gen_map=id_gen_map, register_dataset_types=True 

2119 ) 

2120 self.assertEqual(len(transferred), n_expected) 

2121 log_output = ";".join(cm.output) 

2122 self.assertIn("found in datastore for chunk", log_output) 

2123 self.assertIn("Creating output run", log_output) 

2124 

2125 # Do the transfer twice to ensure that it will do nothing extra. 

2126 # Only do this if purge=True because it does not work for int 

2127 # dataset_id. 

2128 if purge: 

2129 # This should not need to register dataset types. 

2130 transferred = self.target_butler.transfer_from( 

2131 self.source_butler, source_refs, id_gen_map=id_gen_map 

2132 ) 

2133 self.assertEqual(len(transferred), n_expected) 

2134 

2135 # Also do an explicit low-level transfer to trigger some 

2136 # edge cases. 

2137 with self.assertLogs(level=logging.DEBUG) as cm: 

2138 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

2139 log_output = ";".join(cm.output) 

2140 self.assertIn("no file artifacts exist", log_output) 

2141 

2142 with self.assertRaises(TypeError): 

2143 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) 

2144 

2145 with self.assertRaises(ValueError): 

2146 self.target_butler.datastore.transfer_from( 

2147 self.source_butler.datastore, source_refs, transfer="split" 

2148 ) 

2149 

2150 # Now try to get the same refs from the new butler. 

2151 for ref in source_refs: 

2152 if ref not in deleted: 

2153 unresolved_ref = ref.unresolved() 

2154 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run) 

2155 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run) 

2156 self.assertEqual(new_metric, old_metric) 

2157 

2158 # Now prune run2 collection and create instead a CHAINED collection. 

2159 # This should block the transfer. 

2160 self.target_butler.pruneCollection("run2", purge=True, unstore=True) 

2161 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2162 with self.assertRaises(CollectionTypeError): 

2163 # Re-importing the run1 datasets can be problematic if they 

2164 # use integer IDs so filter those out. 

2165 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2166 self.target_butler.transfer_from(self.source_butler, to_transfer, id_gen_map=id_gen_map) 

2167 

2168 

2169if __name__ == "__main__": 2169 ↛ 2170line 2169 didn't jump to line 2170, because the condition on line 2169 was never true

2170 unittest.main()