Coverage for tests/test_butler.py: 16%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1138 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import logging 

26import os 

27import pathlib 

28import pickle 

29import posixpath 

30import random 

31import shutil 

32import socket 

33import string 

34import tempfile 

35import time 

36import unittest 

37 

38try: 

39 import boto3 

40 import botocore 

41 from moto import mock_s3 

42except ImportError: 

43 boto3 = None 

44 

45 def mock_s3(cls): 

46 """A no-op decorator in case moto mock_s3 can not be imported.""" 

47 return cls 

48 

49 

50try: 

51 from cheroot import wsgi 

52 from wsgidav.wsgidav_app import WsgiDAVApp 

53except ImportError: 

54 WsgiDAVApp = None 

55 

56from tempfile import gettempdir 

57from threading import Thread 

58 

59import astropy.time 

60from lsst.daf.butler import ( 

61 Butler, 

62 ButlerConfig, 

63 CollectionSearch, 

64 CollectionType, 

65 Config, 

66 DatasetIdGenEnum, 

67 DatasetRef, 

68 DatasetType, 

69 FileDataset, 

70 FileTemplateValidationError, 

71 StorageClassFactory, 

72 ValidationError, 

73 script, 

74) 

75from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

76from lsst.daf.butler.registry import ( 

77 CollectionError, 

78 CollectionTypeError, 

79 ConflictingDefinitionError, 

80 DataIdValueError, 

81 MissingCollectionError, 

82) 

83from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

84from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

85from lsst.resources import ResourcePath 

86from lsst.resources.http import isWebdavEndpoint 

87from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

88from lsst.utils import doImport 

89from lsst.utils.introspection import get_full_type_name 

90 

91TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

92 

93 

94def makeExampleMetrics(): 

95 return MetricsExample( 

96 {"AM1": 5.2, "AM2": 30.6}, 

97 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

98 [563, 234, 456.7, 752, 8, 9, 27], 

99 ) 

100 

101 

102class TransactionTestError(Exception): 

103 """Specific error for testing transactions, to prevent misdiagnosing 

104 that might otherwise occur when a standard exception is used. 

105 """ 

106 

107 pass 

108 

109 

110class ButlerConfigTests(unittest.TestCase): 

111 """Simple tests for ButlerConfig that are not tested in any other test 

112 cases.""" 

113 

114 def testSearchPath(self): 

115 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

116 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

117 config1 = ButlerConfig(configFile) 

118 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

119 

120 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

121 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

122 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

123 self.assertIn("testConfigs", "\n".join(cm.output)) 

124 

125 key = ("datastore", "records", "table") 

126 self.assertNotEqual(config1[key], config2[key]) 

127 self.assertEqual(config2[key], "override_record") 

128 

129 

130class ButlerPutGetTests: 

131 """Helper method for running a suite of put/get tests from different 

132 butler configurations.""" 

133 

134 root = None 

135 

136 @staticmethod 

137 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

138 """Create a DatasetType and register it""" 

139 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

140 registry.registerDatasetType(datasetType) 

141 return datasetType 

142 

143 @classmethod 

144 def setUpClass(cls): 

145 cls.storageClassFactory = StorageClassFactory() 

146 cls.storageClassFactory.addFromConfig(cls.configFile) 

147 

148 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

149 datasetType = datasetRef.datasetType 

150 dataId = datasetRef.dataId 

151 deferred = butler.getDirectDeferred(datasetRef) 

152 

153 for component in components: 

154 compTypeName = datasetType.componentTypeName(component) 

155 result = butler.get(compTypeName, dataId, collections=collections) 

156 self.assertEqual(result, getattr(reference, component)) 

157 result_deferred = deferred.get(component=component) 

158 self.assertEqual(result_deferred, result) 

159 

160 def tearDown(self): 

161 removeTestTempDir(self.root) 

162 

163 def create_butler(self, run, storageClass, datasetTypeName): 

164 butler = Butler(self.tmpConfigFile, run=run) 

165 

166 collections = set(butler.registry.queryCollections()) 

167 self.assertEqual(collections, set([run])) 

168 

169 # Create and register a DatasetType 

170 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

171 

172 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

173 

174 # Add needed Dimensions 

175 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

176 butler.registry.insertDimensionData( 

177 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

178 ) 

179 butler.registry.insertDimensionData( 

180 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

181 ) 

182 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

183 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

184 butler.registry.insertDimensionData( 

185 "visit", 

186 { 

187 "instrument": "DummyCamComp", 

188 "id": 423, 

189 "name": "fourtwentythree", 

190 "physical_filter": "d-r", 

191 "visit_system": 1, 

192 "datetime_begin": visit_start, 

193 "datetime_end": visit_end, 

194 }, 

195 ) 

196 

197 # Add more visits for some later tests 

198 for visit_id in (424, 425): 

199 butler.registry.insertDimensionData( 

200 "visit", 

201 { 

202 "instrument": "DummyCamComp", 

203 "id": visit_id, 

204 "name": f"fourtwentyfour_{visit_id}", 

205 "physical_filter": "d-r", 

206 "visit_system": 1, 

207 }, 

208 ) 

209 return butler, datasetType 

210 

211 def runPutGetTest(self, storageClass, datasetTypeName): 

212 # New datasets will be added to run and tag, but we will only look in 

213 # tag when looking up datasets. 

214 run = "ingest" 

215 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

216 

217 # Create and store a dataset 

218 metric = makeExampleMetrics() 

219 dataId = {"instrument": "DummyCamComp", "visit": 423} 

220 

221 # Create a DatasetRef for put 

222 refIn = DatasetRef(datasetType, dataId, id=None) 

223 

224 # Put with a preexisting id should fail 

225 with self.assertRaises(ValueError): 

226 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

227 

228 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

229 # and once with a DatasetType 

230 

231 # Keep track of any collections we add and do not clean up 

232 expected_collections = {run} 

233 

234 counter = 0 

235 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

236 # Since we are using subTest we can get cascading failures 

237 # here with the first attempt failing and the others failing 

238 # immediately because the dataset already exists. Work around 

239 # this by using a distinct run collection each time 

240 counter += 1 

241 this_run = f"put_run_{counter}" 

242 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

243 expected_collections.update({this_run}) 

244 

245 with self.subTest(args=args): 

246 ref = butler.put(metric, *args, run=this_run) 

247 self.assertIsInstance(ref, DatasetRef) 

248 

249 # Test getDirect 

250 metricOut = butler.getDirect(ref) 

251 self.assertEqual(metric, metricOut) 

252 # Test get 

253 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

254 self.assertEqual(metric, metricOut) 

255 # Test get with a datasetRef 

256 metricOut = butler.get(ref, collections=this_run) 

257 self.assertEqual(metric, metricOut) 

258 # Test getDeferred with dataId 

259 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

260 self.assertEqual(metric, metricOut) 

261 # Test getDeferred with a datasetRef 

262 metricOut = butler.getDeferred(ref, collections=this_run).get() 

263 self.assertEqual(metric, metricOut) 

264 # and deferred direct with ref 

265 metricOut = butler.getDirectDeferred(ref).get() 

266 self.assertEqual(metric, metricOut) 

267 

268 # Check we can get components 

269 if storageClass.isComposite(): 

270 self.assertGetComponents( 

271 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

272 ) 

273 

274 # Can the artifacts themselves be retrieved? 

275 if not butler.datastore.isEphemeral: 

276 root_uri = ResourcePath(self.root) 

277 

278 for preserve_path in (True, False): 

279 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

280 # Use copy so that we can test that overwrite 

281 # protection works (using "auto" for File URIs would 

282 # use hard links and subsequent transfer would work 

283 # because it knows they are the same file). 

284 transferred = butler.retrieveArtifacts( 

285 [ref], destination, preserve_path=preserve_path, transfer="copy" 

286 ) 

287 self.assertGreater(len(transferred), 0) 

288 artifacts = list(ResourcePath.findFileResources([destination])) 

289 self.assertEqual(set(transferred), set(artifacts)) 

290 

291 for artifact in transferred: 

292 path_in_destination = artifact.relative_to(destination) 

293 self.assertIsNotNone(path_in_destination) 

294 

295 # when path is not preserved there should not be 

296 # any path separators. 

297 num_seps = path_in_destination.count("/") 

298 if preserve_path: 

299 self.assertGreater(num_seps, 0) 

300 else: 

301 self.assertEqual(num_seps, 0) 

302 

303 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

304 n_uris = len(secondary_uris) 

305 if primary_uri: 

306 n_uris += 1 

307 self.assertEqual( 

308 len(artifacts), 

309 n_uris, 

310 "Comparing expected artifacts vs actual:" 

311 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

312 ) 

313 

314 if preserve_path: 

315 # No need to run these twice 

316 with self.assertRaises(ValueError): 

317 butler.retrieveArtifacts([ref], destination, transfer="move") 

318 

319 with self.assertRaises(FileExistsError): 

320 butler.retrieveArtifacts([ref], destination) 

321 

322 transferred_again = butler.retrieveArtifacts( 

323 [ref], destination, preserve_path=preserve_path, overwrite=True 

324 ) 

325 self.assertEqual(set(transferred_again), set(transferred)) 

326 

327 # Now remove the dataset completely. 

328 butler.pruneDatasets([ref], purge=True, unstore=True, run=this_run) 

329 # Lookup with original args should still fail. 

330 with self.assertRaises(LookupError): 

331 butler.datasetExists(*args, collections=this_run) 

332 # getDirect() should still fail. 

333 with self.assertRaises(FileNotFoundError): 

334 butler.getDirect(ref) 

335 # Registry shouldn't be able to find it by dataset_id anymore. 

336 self.assertIsNone(butler.registry.getDataset(ref.id)) 

337 

338 # Do explicit registry removal since we know they are 

339 # empty 

340 butler.registry.removeCollection(this_run) 

341 expected_collections.remove(this_run) 

342 

343 # Put the dataset again, since the last thing we did was remove it 

344 # and we want to use the default collection. 

345 ref = butler.put(metric, refIn) 

346 

347 # Get with parameters 

348 stop = 4 

349 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

350 self.assertNotEqual(metric, sliced) 

351 self.assertEqual(metric.summary, sliced.summary) 

352 self.assertEqual(metric.output, sliced.output) 

353 self.assertEqual(metric.data[:stop], sliced.data) 

354 # getDeferred with parameters 

355 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

356 self.assertNotEqual(metric, sliced) 

357 self.assertEqual(metric.summary, sliced.summary) 

358 self.assertEqual(metric.output, sliced.output) 

359 self.assertEqual(metric.data[:stop], sliced.data) 

360 # getDeferred with deferred parameters 

361 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

362 self.assertNotEqual(metric, sliced) 

363 self.assertEqual(metric.summary, sliced.summary) 

364 self.assertEqual(metric.output, sliced.output) 

365 self.assertEqual(metric.data[:stop], sliced.data) 

366 

367 if storageClass.isComposite(): 

368 # Check that components can be retrieved 

369 metricOut = butler.get(ref.datasetType.name, dataId) 

370 compNameS = ref.datasetType.componentTypeName("summary") 

371 compNameD = ref.datasetType.componentTypeName("data") 

372 summary = butler.get(compNameS, dataId) 

373 self.assertEqual(summary, metric.summary) 

374 data = butler.get(compNameD, dataId) 

375 self.assertEqual(data, metric.data) 

376 

377 if "counter" in storageClass.derivedComponents: 

378 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

379 self.assertEqual(count, len(data)) 

380 

381 count = butler.get( 

382 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

383 ) 

384 self.assertEqual(count, stop) 

385 

386 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

387 summary = butler.getDirect(compRef) 

388 self.assertEqual(summary, metric.summary) 

389 

390 # Create a Dataset type that has the same name but is inconsistent. 

391 inconsistentDatasetType = DatasetType( 

392 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

393 ) 

394 

395 # Getting with a dataset type that does not match registry fails 

396 with self.assertRaises(ValueError): 

397 butler.get(inconsistentDatasetType, dataId) 

398 

399 # Combining a DatasetRef with a dataId should fail 

400 with self.assertRaises(ValueError): 

401 butler.get(ref, dataId) 

402 # Getting with an explicit ref should fail if the id doesn't match 

403 with self.assertRaises(ValueError): 

404 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

405 

406 # Getting a dataset with unknown parameters should fail 

407 with self.assertRaises(KeyError): 

408 butler.get(ref, parameters={"unsupported": True}) 

409 

410 # Check we have a collection 

411 collections = set(butler.registry.queryCollections()) 

412 self.assertEqual(collections, expected_collections) 

413 

414 # Clean up to check that we can remove something that may have 

415 # already had a component removed 

416 butler.pruneDatasets([ref], unstore=True, purge=True) 

417 

418 # Check that we can configure a butler to accept a put even 

419 # if it already has the dataset in registry. 

420 ref = butler.put(metric, refIn) 

421 

422 # Repeat put will fail. 

423 with self.assertRaises(ConflictingDefinitionError): 

424 butler.put(metric, refIn) 

425 

426 # Remove the datastore entry. 

427 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

428 

429 # Put will still fail 

430 with self.assertRaises(ConflictingDefinitionError): 

431 butler.put(metric, refIn) 

432 

433 # Allow the put to succeed 

434 butler._allow_put_of_predefined_dataset = True 

435 ref2 = butler.put(metric, refIn) 

436 self.assertEqual(ref2.id, ref.id) 

437 

438 # A second put will still fail but with a different exception 

439 # than before. 

440 with self.assertRaises(ConflictingDefinitionError): 

441 butler.put(metric, refIn) 

442 

443 # Reset the flag to avoid confusion 

444 butler._allow_put_of_predefined_dataset = False 

445 

446 # Leave the dataset in place since some downstream tests require 

447 # something to be present 

448 

449 return butler 

450 

451 def testDeferredCollectionPassing(self): 

452 # Construct a butler with no run or collection, but make it writeable. 

453 butler = Butler(self.tmpConfigFile, writeable=True) 

454 # Create and register a DatasetType 

455 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

456 datasetType = self.addDatasetType( 

457 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

458 ) 

459 # Add needed Dimensions 

460 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

461 butler.registry.insertDimensionData( 

462 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

463 ) 

464 butler.registry.insertDimensionData( 

465 "visit", 

466 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

467 ) 

468 dataId = {"instrument": "DummyCamComp", "visit": 423} 

469 # Create dataset. 

470 metric = makeExampleMetrics() 

471 # Register a new run and put dataset. 

472 run = "deferred" 

473 self.assertTrue(butler.registry.registerRun(run)) 

474 # Second time it will be allowed but indicate no-op 

475 self.assertFalse(butler.registry.registerRun(run)) 

476 ref = butler.put(metric, datasetType, dataId, run=run) 

477 # Putting with no run should fail with TypeError. 

478 with self.assertRaises(CollectionError): 

479 butler.put(metric, datasetType, dataId) 

480 # Dataset should exist. 

481 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

482 # We should be able to get the dataset back, but with and without 

483 # a deferred dataset handle. 

484 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

485 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

486 # Trying to find the dataset without any collection is a TypeError. 

487 with self.assertRaises(CollectionError): 

488 butler.datasetExists(datasetType, dataId) 

489 with self.assertRaises(CollectionError): 

490 butler.get(datasetType, dataId) 

491 # Associate the dataset with a different collection. 

492 butler.registry.registerCollection("tagged") 

493 butler.registry.associate("tagged", [ref]) 

494 # Deleting the dataset from the new collection should make it findable 

495 # in the original collection. 

496 butler.pruneDatasets([ref], tags=["tagged"]) 

497 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

498 

499 

500class ButlerTests(ButlerPutGetTests): 

501 """Tests for Butler.""" 

502 

503 useTempRoot = True 

504 

505 def setUp(self): 

506 """Create a new butler root for each test.""" 

507 self.root = makeTestTempDir(TESTDIR) 

508 Butler.makeRepo(self.root, config=Config(self.configFile)) 

509 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

510 

511 def testConstructor(self): 

512 """Independent test of constructor.""" 

513 butler = Butler(self.tmpConfigFile, run="ingest") 

514 self.assertIsInstance(butler, Butler) 

515 

516 # Check that butler.yaml is added automatically. 

517 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

518 config_dir = self.tmpConfigFile[: -len(end)] 

519 butler = Butler(config_dir, run="ingest") 

520 self.assertIsInstance(butler, Butler) 

521 

522 collections = set(butler.registry.queryCollections()) 

523 self.assertEqual(collections, {"ingest"}) 

524 

525 # Check that some special characters can be included in run name. 

526 special_run = "u@b.c-A" 

527 butler_special = Butler(butler=butler, run=special_run) 

528 collections = set(butler_special.registry.queryCollections("*@*")) 

529 self.assertEqual(collections, {special_run}) 

530 

531 butler2 = Butler(butler=butler, collections=["other"]) 

532 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"])) 

533 self.assertIsNone(butler2.run) 

534 self.assertIs(butler.datastore, butler2.datastore) 

535 

536 # Test that we can use an environment variable to find this 

537 # repository. 

538 butler_index = Config() 

539 butler_index["label"] = self.tmpConfigFile 

540 for suffix in (".yaml", ".json"): 

541 # Ensure that the content differs so that we know that 

542 # we aren't reusing the cache. 

543 bad_label = f"s3://bucket/not_real{suffix}" 

544 butler_index["bad_label"] = bad_label 

545 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

546 butler_index.dumpToUri(temp_file) 

547 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

548 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label"))) 

549 uri = Butler.get_repo_uri("bad_label") 

550 self.assertEqual(uri, ResourcePath(bad_label)) 

551 uri = Butler.get_repo_uri("label") 

552 butler = Butler(uri, writeable=False) 

553 self.assertIsInstance(butler, Butler) 

554 butler = Butler("label", writeable=False) 

555 self.assertIsInstance(butler, Butler) 

556 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

557 Butler("not_there", writeable=False) 

558 with self.assertRaises(KeyError) as cm: 

559 Butler.get_repo_uri("missing") 

560 self.assertIn("not known to", str(cm.exception)) 

561 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

562 with self.assertRaises(FileNotFoundError): 

563 Butler.get_repo_uri("label") 

564 self.assertEqual(Butler.get_known_repos(), set()) 

565 with self.assertRaises(KeyError) as cm: 

566 # No environment variable set. 

567 Butler.get_repo_uri("label") 

568 self.assertIn("No repository index defined", str(cm.exception)) 

569 with self.assertRaisesRegex(FileNotFoundError, "no known aliases"): 

570 # No aliases registered. 

571 Butler("not_there") 

572 self.assertEqual(Butler.get_known_repos(), set()) 

573 

574 def testBasicPutGet(self): 

575 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

576 self.runPutGetTest(storageClass, "test_metric") 

577 

578 def testCompositePutGetConcrete(self): 

579 

580 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

581 butler = self.runPutGetTest(storageClass, "test_metric") 

582 

583 # Should *not* be disassembled 

584 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

585 self.assertEqual(len(datasets), 1) 

586 uri, components = butler.getURIs(datasets[0]) 

587 self.assertIsInstance(uri, ResourcePath) 

588 self.assertFalse(components) 

589 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

590 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

591 

592 # Predicted dataset 

593 dataId = {"instrument": "DummyCamComp", "visit": 424} 

594 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

595 self.assertFalse(components) 

596 self.assertIsInstance(uri, ResourcePath) 

597 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

598 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

599 

600 def testCompositePutGetVirtual(self): 

601 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

602 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

603 

604 # Should be disassembled 

605 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

606 self.assertEqual(len(datasets), 1) 

607 uri, components = butler.getURIs(datasets[0]) 

608 

609 if butler.datastore.isEphemeral: 

610 # Never disassemble in-memory datastore 

611 self.assertIsInstance(uri, ResourcePath) 

612 self.assertFalse(components) 

613 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

614 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

615 else: 

616 self.assertIsNone(uri) 

617 self.assertEqual(set(components), set(storageClass.components)) 

618 for compuri in components.values(): 

619 self.assertIsInstance(compuri, ResourcePath) 

620 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

621 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

622 

623 # Predicted dataset 

624 dataId = {"instrument": "DummyCamComp", "visit": 424} 

625 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

626 

627 if butler.datastore.isEphemeral: 

628 # Never disassembled 

629 self.assertIsInstance(uri, ResourcePath) 

630 self.assertFalse(components) 

631 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

632 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

633 else: 

634 self.assertIsNone(uri) 

635 self.assertEqual(set(components), set(storageClass.components)) 

636 for compuri in components.values(): 

637 self.assertIsInstance(compuri, ResourcePath) 

638 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

639 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

640 

641 def testIngest(self): 

642 butler = Butler(self.tmpConfigFile, run="ingest") 

643 

644 # Create and register a DatasetType 

645 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

646 

647 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

648 datasetTypeName = "metric" 

649 

650 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

651 

652 # Add needed Dimensions 

653 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

654 butler.registry.insertDimensionData( 

655 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

656 ) 

657 for detector in (1, 2): 

658 butler.registry.insertDimensionData( 

659 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

660 ) 

661 

662 butler.registry.insertDimensionData( 

663 "visit", 

664 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

665 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

666 ) 

667 

668 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

669 dataRoot = os.path.join(TESTDIR, "data", "basic") 

670 datasets = [] 

671 for detector in (1, 2): 

672 detector_name = f"detector_{detector}" 

673 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

674 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

675 # Create a DatasetRef for ingest 

676 refIn = DatasetRef(datasetType, dataId, id=None) 

677 

678 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

679 

680 butler.ingest(*datasets, transfer="copy") 

681 

682 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

683 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

684 

685 metrics1 = butler.get(datasetTypeName, dataId1) 

686 metrics2 = butler.get(datasetTypeName, dataId2) 

687 self.assertNotEqual(metrics1, metrics2) 

688 

689 # Compare URIs 

690 uri1 = butler.getURI(datasetTypeName, dataId1) 

691 uri2 = butler.getURI(datasetTypeName, dataId2) 

692 self.assertNotEqual(uri1, uri2) 

693 

694 # Now do a multi-dataset but single file ingest 

695 metricFile = os.path.join(dataRoot, "detectors.yaml") 

696 refs = [] 

697 for detector in (1, 2): 

698 detector_name = f"detector_{detector}" 

699 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

700 # Create a DatasetRef for ingest 

701 refs.append(DatasetRef(datasetType, dataId, id=None)) 

702 

703 datasets = [] 

704 datasets.append(FileDataset(path=metricFile, refs=refs, formatter=MultiDetectorFormatter)) 

705 

706 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

707 

708 # Check that the datastore recorded no file size. 

709 # Not all datastores can support this. 

710 try: 

711 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0]) 

712 self.assertEqual(infos[0].file_size, -1) 

713 except AttributeError: 

714 pass 

715 

716 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

717 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

718 

719 multi1 = butler.get(datasetTypeName, dataId1) 

720 multi2 = butler.get(datasetTypeName, dataId2) 

721 

722 self.assertEqual(multi1, metrics1) 

723 self.assertEqual(multi2, metrics2) 

724 

725 # Compare URIs 

726 uri1 = butler.getURI(datasetTypeName, dataId1) 

727 uri2 = butler.getURI(datasetTypeName, dataId2) 

728 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

729 

730 # Test that removing one does not break the second 

731 # This line will issue a warning log message for a ChainedDatastore 

732 # that uses an InMemoryDatastore since in-memory can not ingest 

733 # files. 

734 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

735 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

736 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

737 multi2b = butler.get(datasetTypeName, dataId2) 

738 self.assertEqual(multi2, multi2b) 

739 

740 def testPruneCollections(self): 

741 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

742 butler = Butler(self.tmpConfigFile, writeable=True) 

743 # Load registry data with dimensions to hang datasets off of. 

744 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

745 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

746 # Add some RUN-type collections. 

747 run1 = "run1" 

748 butler.registry.registerRun(run1) 

749 run2 = "run2" 

750 butler.registry.registerRun(run2) 

751 # put some datasets. ref1 and ref2 have the same data ID, and are in 

752 # different runs. ref3 has a different data ID. 

753 metric = makeExampleMetrics() 

754 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

755 datasetType = self.addDatasetType( 

756 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

757 ) 

758 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

759 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

760 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

761 

762 # Try to delete a RUN collection without purge, or with purge and not 

763 # unstore. 

764 with self.assertRaises(TypeError): 

765 butler.pruneCollection(run1) 

766 with self.assertRaises(TypeError): 

767 butler.pruneCollection(run2, purge=True) 

768 # Add a TAGGED collection and associate ref3 only into it. 

769 tag1 = "tag1" 

770 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

771 self.assertTrue(registered) 

772 # Registering a second time should be allowed. 

773 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

774 self.assertFalse(registered) 

775 butler.registry.associate(tag1, [ref3]) 

776 # Add a CHAINED collection that searches run1 and then run2. It 

777 # logically contains only ref1, because ref2 is shadowed due to them 

778 # having the same data ID and dataset type. 

779 chain1 = "chain1" 

780 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

781 butler.registry.setCollectionChain(chain1, [run1, run2]) 

782 # Try to delete RUN collections, which should fail with complete 

783 # rollback because they're still referenced by the CHAINED 

784 # collection. 

785 with self.assertRaises(Exception): 

786 butler.pruneCollection(run1, pruge=True, unstore=True) 

787 with self.assertRaises(Exception): 

788 butler.pruneCollection(run2, pruge=True, unstore=True) 

789 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

790 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

791 self.assertTrue(existence[ref1]) 

792 self.assertTrue(existence[ref2]) 

793 self.assertTrue(existence[ref3]) 

794 # Try to delete CHAINED and TAGGED collections with purge; should not 

795 # work. 

796 with self.assertRaises(TypeError): 

797 butler.pruneCollection(tag1, purge=True, unstore=True) 

798 with self.assertRaises(TypeError): 

799 butler.pruneCollection(chain1, purge=True, unstore=True) 

800 # Remove the tagged collection with unstore=False. This should not 

801 # affect the datasets. 

802 butler.pruneCollection(tag1) 

803 with self.assertRaises(MissingCollectionError): 

804 butler.registry.getCollectionType(tag1) 

805 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

806 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

807 self.assertTrue(existence[ref1]) 

808 self.assertTrue(existence[ref2]) 

809 self.assertTrue(existence[ref3]) 

810 # Add the tagged collection back in, and remove it with unstore=True. 

811 # This should remove ref3 only from the datastore. 

812 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

813 butler.registry.associate(tag1, [ref3]) 

814 butler.pruneCollection(tag1, unstore=True) 

815 with self.assertRaises(MissingCollectionError): 

816 butler.registry.getCollectionType(tag1) 

817 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

818 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

819 self.assertTrue(existence[ref1]) 

820 self.assertTrue(existence[ref2]) 

821 self.assertFalse(existence[ref3]) 

822 # Delete the chain with unstore=False. The datasets should not be 

823 # affected at all. 

824 butler.pruneCollection(chain1) 

825 with self.assertRaises(MissingCollectionError): 

826 butler.registry.getCollectionType(chain1) 

827 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

828 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

829 self.assertTrue(existence[ref1]) 

830 self.assertTrue(existence[ref2]) 

831 self.assertFalse(existence[ref3]) 

832 # Redefine and then delete the chain with unstore=True. Only ref1 

833 # should be unstored (ref3 has already been unstored, but otherwise 

834 # would be now). 

835 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

836 butler.registry.setCollectionChain(chain1, [run1, run2]) 

837 butler.pruneCollection(chain1, unstore=True) 

838 with self.assertRaises(MissingCollectionError): 

839 butler.registry.getCollectionType(chain1) 

840 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

841 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

842 self.assertFalse(existence[ref1]) 

843 self.assertTrue(existence[ref2]) 

844 self.assertFalse(existence[ref3]) 

845 # Remove run1. This removes ref1 and ref3 from the registry (they're 

846 # already gone from the datastore, which is fine). 

847 butler.pruneCollection(run1, purge=True, unstore=True) 

848 with self.assertRaises(MissingCollectionError): 

849 butler.registry.getCollectionType(run1) 

850 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref2]) 

851 self.assertTrue(butler.datastore.exists(ref2)) 

852 # Remove run2. This removes ref2 from the registry and the datastore. 

853 butler.pruneCollection(run2, purge=True, unstore=True) 

854 with self.assertRaises(MissingCollectionError): 

855 butler.registry.getCollectionType(run2) 

856 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), []) 

857 

858 # Now that the collections have been pruned we can remove the 

859 # dataset type 

860 butler.registry.removeDatasetType(datasetType.name) 

861 

862 def testPickle(self): 

863 """Test pickle support.""" 

864 butler = Butler(self.tmpConfigFile, run="ingest") 

865 butlerOut = pickle.loads(pickle.dumps(butler)) 

866 self.assertIsInstance(butlerOut, Butler) 

867 self.assertEqual(butlerOut._config, butler._config) 

868 self.assertEqual(butlerOut.collections, butler.collections) 

869 self.assertEqual(butlerOut.run, butler.run) 

870 

871 def testGetDatasetTypes(self): 

872 butler = Butler(self.tmpConfigFile, run="ingest") 

873 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

874 dimensionEntries = [ 

875 ( 

876 "instrument", 

877 {"instrument": "DummyCam"}, 

878 {"instrument": "DummyHSC"}, 

879 {"instrument": "DummyCamComp"}, 

880 ), 

881 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

882 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

883 ] 

884 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

885 # Add needed Dimensions 

886 for args in dimensionEntries: 

887 butler.registry.insertDimensionData(*args) 

888 

889 # When a DatasetType is added to the registry entries are not created 

890 # for components but querying them can return the components. 

891 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

892 components = set() 

893 for datasetTypeName in datasetTypeNames: 

894 # Create and register a DatasetType 

895 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

896 

897 for componentName in storageClass.components: 

898 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

899 

900 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

901 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

902 

903 # Now that we have some dataset types registered, validate them 

904 butler.validateConfiguration( 

905 ignore=[ 

906 "test_metric_comp", 

907 "metric3", 

908 "calexp", 

909 "DummySC", 

910 "datasetType.component", 

911 "random_data", 

912 "random_data_2", 

913 ] 

914 ) 

915 

916 # Add a new datasetType that will fail template validation 

917 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

918 if self.validationCanFail: 

919 with self.assertRaises(ValidationError): 

920 butler.validateConfiguration() 

921 

922 # Rerun validation but with a subset of dataset type names 

923 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

924 

925 # Rerun validation but ignore the bad datasetType 

926 butler.validateConfiguration( 

927 ignore=[ 

928 "test_metric_comp", 

929 "metric3", 

930 "calexp", 

931 "DummySC", 

932 "datasetType.component", 

933 "random_data", 

934 "random_data_2", 

935 ] 

936 ) 

937 

938 def testTransaction(self): 

939 butler = Butler(self.tmpConfigFile, run="ingest") 

940 datasetTypeName = "test_metric" 

941 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

942 dimensionEntries = ( 

943 ("instrument", {"instrument": "DummyCam"}), 

944 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

945 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

946 ) 

947 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

948 metric = makeExampleMetrics() 

949 dataId = {"instrument": "DummyCam", "visit": 42} 

950 # Create and register a DatasetType 

951 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

952 with self.assertRaises(TransactionTestError): 

953 with butler.transaction(): 

954 # Add needed Dimensions 

955 for args in dimensionEntries: 

956 butler.registry.insertDimensionData(*args) 

957 # Store a dataset 

958 ref = butler.put(metric, datasetTypeName, dataId) 

959 self.assertIsInstance(ref, DatasetRef) 

960 # Test getDirect 

961 metricOut = butler.getDirect(ref) 

962 self.assertEqual(metric, metricOut) 

963 # Test get 

964 metricOut = butler.get(datasetTypeName, dataId) 

965 self.assertEqual(metric, metricOut) 

966 # Check we can get components 

967 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

968 raise TransactionTestError("This should roll back the entire transaction") 

969 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

970 butler.registry.expandDataId(dataId) 

971 # Should raise LookupError for missing data ID value 

972 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

973 butler.get(datasetTypeName, dataId) 

974 # Also check explicitly if Dataset entry is missing 

975 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

976 # Direct retrieval should not find the file in the Datastore 

977 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

978 butler.getDirect(ref) 

979 

980 def testMakeRepo(self): 

981 """Test that we can write butler configuration to a new repository via 

982 the Butler.makeRepo interface and then instantiate a butler from the 

983 repo root. 

984 """ 

985 # Do not run the test if we know this datastore configuration does 

986 # not support a file system root 

987 if self.fullConfigKey is None: 

988 return 

989 

990 # create two separate directories 

991 root1 = tempfile.mkdtemp(dir=self.root) 

992 root2 = tempfile.mkdtemp(dir=self.root) 

993 

994 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

995 limited = Config(self.configFile) 

996 butler1 = Butler(butlerConfig) 

997 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

998 full = Config(self.tmpConfigFile) 

999 butler2 = Butler(butlerConfig) 

1000 # Butlers should have the same configuration regardless of whether 

1001 # defaults were expanded. 

1002 self.assertEqual(butler1._config, butler2._config) 

1003 # Config files loaded directly should not be the same. 

1004 self.assertNotEqual(limited, full) 

1005 # Make sure "limited" doesn't have a few keys we know it should be 

1006 # inheriting from defaults. 

1007 self.assertIn(self.fullConfigKey, full) 

1008 self.assertNotIn(self.fullConfigKey, limited) 

1009 

1010 # Collections don't appear until something is put in them 

1011 collections1 = set(butler1.registry.queryCollections()) 

1012 self.assertEqual(collections1, set()) 

1013 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1014 

1015 # Check that a config with no associated file name will not 

1016 # work properly with relocatable Butler repo 

1017 butlerConfig.configFile = None 

1018 with self.assertRaises(ValueError): 

1019 Butler(butlerConfig) 

1020 

1021 with self.assertRaises(FileExistsError): 

1022 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1023 

1024 def testStringification(self): 

1025 butler = Butler(self.tmpConfigFile, run="ingest") 

1026 butlerStr = str(butler) 

1027 

1028 if self.datastoreStr is not None: 

1029 for testStr in self.datastoreStr: 

1030 self.assertIn(testStr, butlerStr) 

1031 if self.registryStr is not None: 

1032 self.assertIn(self.registryStr, butlerStr) 

1033 

1034 datastoreName = butler.datastore.name 

1035 if self.datastoreName is not None: 

1036 for testStr in self.datastoreName: 

1037 self.assertIn(testStr, datastoreName) 

1038 

1039 def testButlerRewriteDataId(self): 

1040 """Test that dataIds can be rewritten based on dimension records.""" 

1041 

1042 butler = Butler(self.tmpConfigFile, run="ingest") 

1043 

1044 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1045 datasetTypeName = "random_data" 

1046 

1047 # Create dimension records. 

1048 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1049 butler.registry.insertDimensionData( 

1050 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1051 ) 

1052 butler.registry.insertDimensionData( 

1053 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1054 ) 

1055 

1056 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1057 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1058 butler.registry.registerDatasetType(datasetType) 

1059 

1060 n_exposures = 5 

1061 dayobs = 20210530 

1062 

1063 for i in range(n_exposures): 

1064 butler.registry.insertDimensionData( 

1065 "exposure", 

1066 { 

1067 "instrument": "DummyCamComp", 

1068 "id": i, 

1069 "obs_id": f"exp{i}", 

1070 "seq_num": i, 

1071 "day_obs": dayobs, 

1072 "physical_filter": "d-r", 

1073 }, 

1074 ) 

1075 

1076 # Write some data. 

1077 for i in range(n_exposures): 

1078 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1079 

1080 # Use the seq_num for the put to test rewriting. 

1081 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1082 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1083 

1084 # Check that the exposure is correct in the dataId 

1085 self.assertEqual(ref.dataId["exposure"], i) 

1086 

1087 # and check that we can get the dataset back with the same dataId 

1088 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1089 self.assertEqual(new_metric, metric) 

1090 

1091 

1092class FileDatastoreButlerTests(ButlerTests): 

1093 """Common tests and specialization of ButlerTests for butlers backed 

1094 by datastores that inherit from FileDatastore. 

1095 """ 

1096 

1097 def checkFileExists(self, root, relpath): 

1098 """Checks if file exists at a given path (relative to root). 

1099 

1100 Test testPutTemplates verifies actual physical existance of the files 

1101 in the requested location. 

1102 """ 

1103 uri = ResourcePath(root, forceDirectory=True) 

1104 return uri.join(relpath).exists() 

1105 

1106 def testPutTemplates(self): 

1107 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1108 butler = Butler(self.tmpConfigFile, run="ingest") 

1109 

1110 # Add needed Dimensions 

1111 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1112 butler.registry.insertDimensionData( 

1113 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1114 ) 

1115 butler.registry.insertDimensionData( 

1116 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1117 ) 

1118 butler.registry.insertDimensionData( 

1119 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1120 ) 

1121 

1122 # Create and store a dataset 

1123 metric = makeExampleMetrics() 

1124 

1125 # Create two almost-identical DatasetTypes (both will use default 

1126 # template) 

1127 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1128 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1129 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1130 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1131 

1132 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1133 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1134 

1135 # Put with exactly the data ID keys needed 

1136 ref = butler.put(metric, "metric1", dataId1) 

1137 uri = butler.getURI(ref) 

1138 self.assertTrue( 

1139 self.checkFileExists(butler.datastore.root, "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"), 

1140 f"Checking existence of {uri}", 

1141 ) 

1142 

1143 # Check the template based on dimensions 

1144 butler.datastore.templates.validateTemplates([ref]) 

1145 

1146 # Put with extra data ID keys (physical_filter is an optional 

1147 # dependency); should not change template (at least the way we're 

1148 # defining them to behave now; the important thing is that they 

1149 # must be consistent). 

1150 ref = butler.put(metric, "metric2", dataId2) 

1151 uri = butler.getURI(ref) 

1152 self.assertTrue( 

1153 self.checkFileExists(butler.datastore.root, "ingest/metric2/d-r/DummyCamComp_v423.pickle"), 

1154 f"Checking existence of {uri}", 

1155 ) 

1156 

1157 # Check the template based on dimensions 

1158 butler.datastore.templates.validateTemplates([ref]) 

1159 

1160 # Now use a file template that will not result in unique filenames 

1161 with self.assertRaises(FileTemplateValidationError): 

1162 butler.put(metric, "metric3", dataId1) 

1163 

1164 def testImportExport(self): 

1165 # Run put/get tests just to create and populate a repo. 

1166 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1167 self.runImportExportTest(storageClass) 

1168 

1169 @unittest.expectedFailure 

1170 def testImportExportVirtualComposite(self): 

1171 # Run put/get tests just to create and populate a repo. 

1172 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1173 self.runImportExportTest(storageClass) 

1174 

1175 def runImportExportTest(self, storageClass): 

1176 """This test does an export to a temp directory and an import back 

1177 into a new temp directory repo. It does not assume a posix datastore""" 

1178 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1179 print("Root:", exportButler.datastore.root) 

1180 # Test that the repo actually has at least one dataset. 

1181 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1182 self.assertGreater(len(datasets), 0) 

1183 # Add a DimensionRecord that's unused by those datasets. 

1184 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1185 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1186 # Export and then import datasets. 

1187 with safeTestTempDir(TESTDIR) as exportDir: 

1188 exportFile = os.path.join(exportDir, "exports.yaml") 

1189 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1190 export.saveDatasets(datasets) 

1191 # Export the same datasets again. This should quietly do 

1192 # nothing because of internal deduplication, and it shouldn't 

1193 # complain about being asked to export the "htm7" elements even 

1194 # though there aren't any in these datasets or in the database. 

1195 export.saveDatasets(datasets, elements=["htm7"]) 

1196 # Save one of the data IDs again; this should be harmless 

1197 # because of internal deduplication. 

1198 export.saveDataIds([datasets[0].dataId]) 

1199 # Save some dimension records directly. 

1200 export.saveDimensionData("skymap", [skymapRecord]) 

1201 self.assertTrue(os.path.exists(exportFile)) 

1202 with safeTestTempDir(TESTDIR) as importDir: 

1203 # We always want this to be a local posix butler 

1204 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1205 # Calling script.butlerImport tests the implementation of the 

1206 # butler command line interface "import" subcommand. Functions 

1207 # in the script folder are generally considered protected and 

1208 # should not be used as public api. 

1209 with open(exportFile, "r") as f: 

1210 script.butlerImport( 

1211 importDir, 

1212 export_file=f, 

1213 directory=exportDir, 

1214 transfer="auto", 

1215 skip_dimensions=None, 

1216 reuse_ids=False, 

1217 ) 

1218 importButler = Butler(importDir, run="ingest") 

1219 for ref in datasets: 

1220 with self.subTest(ref=ref): 

1221 # Test for existence by passing in the DatasetType and 

1222 # data ID separately, to avoid lookup by dataset_id. 

1223 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1224 self.assertEqual( 

1225 list(importButler.registry.queryDimensionRecords("skymap")), 

1226 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)], 

1227 ) 

1228 

1229 def testRemoveRuns(self): 

1230 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1231 butler = Butler(self.tmpConfigFile, writeable=True) 

1232 # Load registry data with dimensions to hang datasets off of. 

1233 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1234 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1235 # Add some RUN-type collection. 

1236 run1 = "run1" 

1237 butler.registry.registerRun(run1) 

1238 run2 = "run2" 

1239 butler.registry.registerRun(run2) 

1240 # put a dataset in each 

1241 metric = makeExampleMetrics() 

1242 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1243 datasetType = self.addDatasetType( 

1244 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1245 ) 

1246 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1247 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1248 uri1 = butler.getURI(ref1, collections=[run1]) 

1249 uri2 = butler.getURI(ref2, collections=[run2]) 

1250 # Remove from both runs with different values for unstore. 

1251 butler.removeRuns([run1], unstore=True) 

1252 butler.removeRuns([run2], unstore=False) 

1253 # Should be nothing in registry for either one, and datastore should 

1254 # not think either exists. 

1255 with self.assertRaises(MissingCollectionError): 

1256 butler.registry.getCollectionType(run1) 

1257 with self.assertRaises(MissingCollectionError): 

1258 butler.registry.getCollectionType(run2) 

1259 self.assertFalse(butler.datastore.exists(ref1)) 

1260 self.assertFalse(butler.datastore.exists(ref2)) 

1261 # The ref we unstored should be gone according to the URI, but the 

1262 # one we forgot should still be around. 

1263 self.assertFalse(uri1.exists()) 

1264 self.assertTrue(uri2.exists()) 

1265 

1266 

1267class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1268 """PosixDatastore specialization of a butler""" 

1269 

1270 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1271 fullConfigKey = ".datastore.formatters" 

1272 validationCanFail = True 

1273 datastoreStr = ["/tmp"] 

1274 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1275 registryStr = "/gen3.sqlite3" 

1276 

1277 def testPathConstructor(self): 

1278 """Independent test of constructor using PathLike.""" 

1279 butler = Butler(self.tmpConfigFile, run="ingest") 

1280 self.assertIsInstance(butler, Butler) 

1281 

1282 # And again with a Path object with the butler yaml 

1283 path = pathlib.Path(self.tmpConfigFile) 

1284 butler = Butler(path, writeable=False) 

1285 self.assertIsInstance(butler, Butler) 

1286 

1287 # And again with a Path object without the butler yaml 

1288 # (making sure we skip it if the tmp config doesn't end 

1289 # in butler.yaml -- which is the case for a subclass) 

1290 if self.tmpConfigFile.endswith("butler.yaml"): 

1291 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1292 butler = Butler(path, writeable=False) 

1293 self.assertIsInstance(butler, Butler) 

1294 

1295 def testExportTransferCopy(self): 

1296 """Test local export using all transfer modes""" 

1297 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1298 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1299 # Test that the repo actually has at least one dataset. 

1300 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1301 self.assertGreater(len(datasets), 0) 

1302 uris = [exportButler.getURI(d) for d in datasets] 

1303 datastoreRoot = exportButler.datastore.root 

1304 

1305 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1306 

1307 for path in pathsInStore: 

1308 # Assume local file system 

1309 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1310 

1311 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1312 with safeTestTempDir(TESTDIR) as exportDir: 

1313 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1314 export.saveDatasets(datasets) 

1315 for path in pathsInStore: 

1316 self.assertTrue( 

1317 self.checkFileExists(exportDir, path), 

1318 f"Check that mode {transfer} exported files", 

1319 ) 

1320 

1321 def testPruneDatasets(self): 

1322 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1323 butler = Butler(self.tmpConfigFile, writeable=True) 

1324 # Load registry data with dimensions to hang datasets off of. 

1325 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1326 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1327 # Add some RUN-type collections. 

1328 run1 = "run1" 

1329 butler.registry.registerRun(run1) 

1330 run2 = "run2" 

1331 butler.registry.registerRun(run2) 

1332 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1333 # different runs. ref3 has a different data ID. 

1334 metric = makeExampleMetrics() 

1335 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1336 datasetType = self.addDatasetType( 

1337 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1338 ) 

1339 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1340 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1341 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1342 

1343 # Simple prune. 

1344 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1345 with self.assertRaises(LookupError): 

1346 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1) 

1347 

1348 # Put data back. 

1349 ref1 = butler.put(metric, ref1.unresolved(), run=run1) 

1350 ref2 = butler.put(metric, ref2.unresolved(), run=run2) 

1351 ref3 = butler.put(metric, ref3.unresolved(), run=run1) 

1352 

1353 # Check that in normal mode, deleting the record will lead to 

1354 # trash not touching the file. 

1355 uri1 = butler.datastore.getURI(ref1) 

1356 butler.datastore.bridge.moveToTrash([ref1]) # Update the dataset_location table 

1357 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id}) 

1358 butler.datastore.trash(ref1) 

1359 butler.datastore.emptyTrash() 

1360 self.assertTrue(uri1.exists()) 

1361 uri1.remove() # Clean it up. 

1362 

1363 # Simulate execution butler setup by deleting the datastore 

1364 # record but keeping the file around and trusting. 

1365 butler.datastore.trustGetRequest = True 

1366 uri2 = butler.datastore.getURI(ref2) 

1367 uri3 = butler.datastore.getURI(ref3) 

1368 self.assertTrue(uri2.exists()) 

1369 self.assertTrue(uri3.exists()) 

1370 

1371 # Remove the datastore record. 

1372 butler.datastore.bridge.moveToTrash([ref2]) # Update the dataset_location table 

1373 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id}) 

1374 self.assertTrue(uri2.exists()) 

1375 butler.datastore.trash([ref2, ref3]) 

1376 # Immediate removal for ref2 file 

1377 self.assertFalse(uri2.exists()) 

1378 # But ref3 has to wait for the empty. 

1379 self.assertTrue(uri3.exists()) 

1380 butler.datastore.emptyTrash() 

1381 self.assertFalse(uri3.exists()) 

1382 

1383 # Clear out the datasets from registry. 

1384 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1385 

1386 def testPytypePutCoercion(self): 

1387 """Test python type coercion on Butler.get and put.""" 

1388 

1389 # Store some data with the normal example storage class. 

1390 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1391 datasetTypeName = "test_metric" 

1392 butler, _ = self.create_butler("ingest", storageClass, datasetTypeName) 

1393 

1394 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1395 

1396 # Put a dict and this should coerce to a MetricsExample 

1397 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

1398 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

1399 test_metric = butler.getDirect(metric_ref) 

1400 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

1401 self.assertEqual(test_metric.summary, test_dict["summary"]) 

1402 self.assertEqual(test_metric.output, test_dict["output"]) 

1403 

1404 # Check that the put still works if a DatasetType is given with 

1405 # a definition matching this python type. 

1406 registry_type = butler.registry.getDatasetType(datasetTypeName) 

1407 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

1408 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

1409 self.assertEqual(metric2_ref.datasetType, registry_type) 

1410 

1411 # The get will return the type expected by registry. 

1412 test_metric2 = butler.getDirect(metric2_ref) 

1413 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

1414 

1415 # Make a new DatasetRef with the compatible but different DatasetType. 

1416 # This should now return a dict. 

1417 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

1418 test_dict2 = butler.getDirect(new_ref) 

1419 self.assertEqual(get_full_type_name(test_dict2), "dict") 

1420 

1421 # Get it again with the wrong dataset type definition using get() 

1422 # rather than getDirect(). This should be consistent with getDirect() 

1423 # behavior and return the type of the DatasetType. 

1424 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

1425 self.assertEqual(get_full_type_name(test_dict3), "dict") 

1426 

1427 def testPytypeCoercion(self): 

1428 """Test python type coercion on Butler.get and put.""" 

1429 

1430 # Store some data with the normal example storage class. 

1431 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1432 datasetTypeName = "test_metric" 

1433 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1434 

1435 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1436 metric = butler.get(datasetTypeName, dataId=dataId) 

1437 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1438 

1439 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1440 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1441 

1442 # Now need to hack the registry dataset type definition. 

1443 # There is no API for this. 

1444 manager = butler.registry._managers.datasets 

1445 manager._db.update( 

1446 manager._static.dataset_type, 

1447 {"name": datasetTypeName}, 

1448 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1449 ) 

1450 

1451 # Force reset of dataset type cache 

1452 butler.registry.refresh() 

1453 

1454 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1455 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1456 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1457 

1458 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1459 self.assertNotEqual(type(metric_model), type(metric)) 

1460 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1461 

1462 # Put the model and read it back to show that everything now 

1463 # works as normal. 

1464 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1465 metric_model_new = butler.get(metric_ref) 

1466 self.assertEqual(metric_model_new, metric_model) 

1467 

1468 # Hack the storage class again to something that will fail on the 

1469 # get with no conversion class. 

1470 manager._db.update( 

1471 manager._static.dataset_type, 

1472 {"name": datasetTypeName}, 

1473 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1474 ) 

1475 butler.registry.refresh() 

1476 

1477 with self.assertRaises(ValueError): 

1478 butler.get(datasetTypeName, dataId=dataId) 

1479 

1480 

1481class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1482 """InMemoryDatastore specialization of a butler""" 

1483 

1484 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1485 fullConfigKey = None 

1486 useTempRoot = False 

1487 validationCanFail = False 

1488 datastoreStr = ["datastore='InMemory"] 

1489 datastoreName = ["InMemoryDatastore@"] 

1490 registryStr = "/gen3.sqlite3" 

1491 

1492 def testIngest(self): 

1493 pass 

1494 

1495 

1496class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1497 """PosixDatastore specialization""" 

1498 

1499 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1500 fullConfigKey = ".datastore.datastores.1.formatters" 

1501 validationCanFail = True 

1502 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1503 datastoreName = [ 

1504 "InMemoryDatastore@", 

1505 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1506 "SecondDatastore", 

1507 ] 

1508 registryStr = "/gen3.sqlite3" 

1509 

1510 

1511class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1512 """Test that a yaml file in one location can refer to a root in another.""" 

1513 

1514 datastoreStr = ["dir1"] 

1515 # Disable the makeRepo test since we are deliberately not using 

1516 # butler.yaml as the config name. 

1517 fullConfigKey = None 

1518 

1519 def setUp(self): 

1520 self.root = makeTestTempDir(TESTDIR) 

1521 

1522 # Make a new repository in one place 

1523 self.dir1 = os.path.join(self.root, "dir1") 

1524 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1525 

1526 # Move the yaml file to a different place and add a "root" 

1527 self.dir2 = os.path.join(self.root, "dir2") 

1528 os.makedirs(self.dir2, exist_ok=True) 

1529 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1530 config = Config(configFile1) 

1531 config["root"] = self.dir1 

1532 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1533 config.dumpToUri(configFile2) 

1534 os.remove(configFile1) 

1535 self.tmpConfigFile = configFile2 

1536 

1537 def testFileLocations(self): 

1538 self.assertNotEqual(self.dir1, self.dir2) 

1539 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1540 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1541 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1542 

1543 

1544class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1545 """Test that a config file created by makeRepo outside of repo works.""" 

1546 

1547 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1548 

1549 def setUp(self): 

1550 self.root = makeTestTempDir(TESTDIR) 

1551 self.root2 = makeTestTempDir(TESTDIR) 

1552 

1553 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1554 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1555 

1556 def tearDown(self): 

1557 if os.path.exists(self.root2): 

1558 shutil.rmtree(self.root2, ignore_errors=True) 

1559 super().tearDown() 

1560 

1561 def testConfigExistence(self): 

1562 c = Config(self.tmpConfigFile) 

1563 uri_config = ResourcePath(c["root"]) 

1564 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1565 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1566 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1567 

1568 def testPutGet(self): 

1569 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1570 self.runPutGetTest(storageClass, "test_metric") 

1571 

1572 

1573class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1574 """Test that a config file created by makeRepo outside of repo works.""" 

1575 

1576 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1577 

1578 def setUp(self): 

1579 self.root = makeTestTempDir(TESTDIR) 

1580 self.root2 = makeTestTempDir(TESTDIR) 

1581 

1582 self.tmpConfigFile = self.root2 

1583 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1584 

1585 def testConfigExistence(self): 

1586 # Append the yaml file else Config constructor does not know the file 

1587 # type. 

1588 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1589 super().testConfigExistence() 

1590 

1591 

1592class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1593 """Test that a config file created by makeRepo outside of repo works.""" 

1594 

1595 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1596 

1597 def setUp(self): 

1598 self.root = makeTestTempDir(TESTDIR) 

1599 self.root2 = makeTestTempDir(TESTDIR) 

1600 

1601 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1602 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1603 

1604 

1605@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1606class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1607 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1608 a local in-memory SqlRegistry. 

1609 """ 

1610 

1611 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1612 fullConfigKey = None 

1613 validationCanFail = True 

1614 

1615 bucketName = "anybucketname" 

1616 """Name of the Bucket that will be used in the tests. The name is read from 

1617 the config file used with the tests during set-up. 

1618 """ 

1619 

1620 root = "butlerRoot/" 

1621 """Root repository directory expected to be used in case useTempRoot=False. 

1622 Otherwise the root is set to a 20 characters long randomly generated string 

1623 during set-up. 

1624 """ 

1625 

1626 datastoreStr = [f"datastore={root}"] 

1627 """Contains all expected root locations in a format expected to be 

1628 returned by Butler stringification. 

1629 """ 

1630 

1631 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1632 """The expected format of the S3 Datastore string.""" 

1633 

1634 registryStr = "/gen3.sqlite3" 

1635 """Expected format of the Registry string.""" 

1636 

1637 mock_s3 = mock_s3() 

1638 """The mocked s3 interface from moto.""" 

1639 

1640 def genRoot(self): 

1641 """Returns a random string of len 20 to serve as a root 

1642 name for the temporary bucket repo. 

1643 

1644 This is equivalent to tempfile.mkdtemp as this is what self.root 

1645 becomes when useTempRoot is True. 

1646 """ 

1647 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1648 return rndstr + "/" 

1649 

1650 def setUp(self): 

1651 config = Config(self.configFile) 

1652 uri = ResourcePath(config[".datastore.datastore.root"]) 

1653 self.bucketName = uri.netloc 

1654 

1655 # Enable S3 mocking of tests. 

1656 self.mock_s3.start() 

1657 

1658 # set up some fake credentials if they do not exist 

1659 self.usingDummyCredentials = setAwsEnvCredentials() 

1660 

1661 if self.useTempRoot: 

1662 self.root = self.genRoot() 

1663 rooturi = f"s3://{self.bucketName}/{self.root}" 

1664 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1665 

1666 # need local folder to store registry database 

1667 self.reg_dir = makeTestTempDir(TESTDIR) 

1668 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1669 

1670 # MOTO needs to know that we expect Bucket bucketname to exist 

1671 # (this used to be the class attribute bucketName) 

1672 s3 = boto3.resource("s3") 

1673 s3.create_bucket(Bucket=self.bucketName) 

1674 

1675 self.datastoreStr = f"datastore={self.root}" 

1676 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1677 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1678 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1679 

1680 def tearDown(self): 

1681 s3 = boto3.resource("s3") 

1682 bucket = s3.Bucket(self.bucketName) 

1683 try: 

1684 bucket.objects.all().delete() 

1685 except botocore.exceptions.ClientError as e: 

1686 if e.response["Error"]["Code"] == "404": 

1687 # the key was not reachable - pass 

1688 pass 

1689 else: 

1690 raise 

1691 

1692 bucket = s3.Bucket(self.bucketName) 

1693 bucket.delete() 

1694 

1695 # Stop the S3 mock. 

1696 self.mock_s3.stop() 

1697 

1698 # unset any potentially set dummy credentials 

1699 if self.usingDummyCredentials: 

1700 unsetAwsEnvCredentials() 

1701 

1702 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1703 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1704 

1705 if self.useTempRoot and os.path.exists(self.root): 

1706 shutil.rmtree(self.root, ignore_errors=True) 

1707 

1708 

1709@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1710# Mock required environment variables during tests 

1711@unittest.mock.patch.dict( 

1712 os.environ, 

1713 { 

1714 "LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1715 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(TESTDIR, "config/testConfigs/webdav/token"), 

1716 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs", 

1717 }, 

1718) 

1719class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1720 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1721 a local in-memory SqlRegistry. 

1722 """ 

1723 

1724 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1725 fullConfigKey = None 

1726 validationCanFail = True 

1727 

1728 serverName = "localhost" 

1729 """Name of the server that will be used in the tests. 

1730 """ 

1731 

1732 portNumber = 8080 

1733 """Port on which the webdav server listens. Automatically chosen 

1734 at setUpClass via the _getfreeport() method 

1735 """ 

1736 

1737 root = "butlerRoot/" 

1738 """Root repository directory expected to be used in case useTempRoot=False. 

1739 Otherwise the root is set to a 20 characters long randomly generated string 

1740 during set-up. 

1741 """ 

1742 

1743 datastoreStr = [f"datastore={root}"] 

1744 """Contains all expected root locations in a format expected to be 

1745 returned by Butler stringification. 

1746 """ 

1747 

1748 datastoreName = ["FileDatastore@https://{serverName}/{root}"] 

1749 """The expected format of the WebdavDatastore string.""" 

1750 

1751 registryStr = "/gen3.sqlite3" 

1752 """Expected format of the Registry string.""" 

1753 

1754 serverThread = None 

1755 """Thread in which the local webdav server will run""" 

1756 

1757 stopWebdavServer = False 

1758 """This flag will cause the webdav server to 

1759 gracefully shut down when True 

1760 """ 

1761 

1762 def genRoot(self): 

1763 """Returns a random string of len 20 to serve as a root 

1764 name for the temporary bucket repo. 

1765 

1766 This is equivalent to tempfile.mkdtemp as this is what self.root 

1767 becomes when useTempRoot is True. 

1768 """ 

1769 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1770 return rndstr + "/" 

1771 

1772 @classmethod 

1773 def setUpClass(cls): 

1774 # Do the same as inherited class 

1775 cls.storageClassFactory = StorageClassFactory() 

1776 cls.storageClassFactory.addFromConfig(cls.configFile) 

1777 

1778 cls.portNumber = cls._getfreeport() 

1779 # Run a local webdav server on which tests will be run 

1780 cls.serverThread = Thread( 

1781 target=cls._serveWebdav, args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), daemon=True 

1782 ) 

1783 cls.serverThread.start() 

1784 # Wait for it to start 

1785 time.sleep(3) 

1786 

1787 @classmethod 

1788 def tearDownClass(cls): 

1789 # Ask for graceful shut down of the webdav server 

1790 cls.stopWebdavServer = True 

1791 # Wait for the thread to exit 

1792 cls.serverThread.join() 

1793 

1794 # Mock required environment variables during tests 

1795 @unittest.mock.patch.dict( 

1796 os.environ, 

1797 { 

1798 "LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1799 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(TESTDIR, "config/testConfigs/webdav/token"), 

1800 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs", 

1801 }, 

1802 ) 

1803 def setUp(self): 

1804 config = Config(self.configFile) 

1805 

1806 if self.useTempRoot: 

1807 self.root = self.genRoot() 

1808 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1809 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1810 

1811 # need local folder to store registry database 

1812 self.reg_dir = makeTestTempDir(TESTDIR) 

1813 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1814 

1815 self.datastoreStr = f"datastore={self.root}" 

1816 self.datastoreName = [f"FileDatastore@{self.rooturi}"] 

1817 

1818 if not isWebdavEndpoint(self.rooturi): 

1819 raise OSError("Webdav server not running properly: cannot run tests.") 

1820 

1821 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1822 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1823 

1824 # Mock required environment variables during tests 

1825 @unittest.mock.patch.dict( 

1826 os.environ, 

1827 { 

1828 "LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1829 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(TESTDIR, "config/testConfigs/webdav/token"), 

1830 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs", 

1831 }, 

1832 ) 

1833 def tearDown(self): 

1834 # Clear temporary directory 

1835 ResourcePath(self.rooturi).remove() 

1836 ResourcePath(self.rooturi).session.close() 

1837 

1838 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1839 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1840 

1841 if self.useTempRoot and os.path.exists(self.root): 

1842 shutil.rmtree(self.root, ignore_errors=True) 

1843 

1844 def _serveWebdav(self, port: int, stopWebdavServer): 

1845 """Starts a local webdav-compatible HTTP server, 

1846 Listening on http://localhost:port 

1847 This server only runs when this test class is instantiated, 

1848 and then shuts down. Must be started is a separate thread. 

1849 

1850 Parameters 

1851 ---------- 

1852 port : `int` 

1853 The port number on which the server should listen 

1854 """ 

1855 root_path = gettempdir() 

1856 

1857 config = { 

1858 "host": "0.0.0.0", 

1859 "port": port, 

1860 "provider_mapping": {"/": root_path}, 

1861 "http_authenticator": {"domain_controller": None}, 

1862 "simple_dc": {"user_mapping": {"*": True}}, 

1863 "verbose": 0, 

1864 } 

1865 app = WsgiDAVApp(config) 

1866 

1867 server_args = { 

1868 "bind_addr": (config["host"], config["port"]), 

1869 "wsgi_app": app, 

1870 } 

1871 server = wsgi.Server(**server_args) 

1872 server.prepare() 

1873 

1874 try: 

1875 # Start the actual server in a separate thread 

1876 t = Thread(target=server.serve, daemon=True) 

1877 t.start() 

1878 # watch stopWebdavServer, and gracefully 

1879 # shut down the server when True 

1880 while True: 

1881 if stopWebdavServer(): 

1882 break 

1883 time.sleep(1) 

1884 except KeyboardInterrupt: 

1885 print("Caught Ctrl-C, shutting down...") 

1886 finally: 

1887 server.stop() 

1888 t.join() 

1889 

1890 def _getfreeport(): 

1891 """ 

1892 Determines a free port using sockets. 

1893 """ 

1894 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

1895 free_socket.bind(("0.0.0.0", 0)) 

1896 free_socket.listen() 

1897 port = free_socket.getsockname()[1] 

1898 free_socket.close() 

1899 return port 

1900 

1901 

1902class PosixDatastoreTransfers(unittest.TestCase): 

1903 """Test data transfers between butlers. 

1904 

1905 Test for different managers. UUID to UUID and integer to integer are 

1906 tested. UUID to integer is not supported since we do not currently 

1907 want to allow that. Integer to UUID is supported with the caveat 

1908 that UUID4 will be generated and this will be incorrect for raw 

1909 dataset types. The test ignores that. 

1910 """ 

1911 

1912 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1913 

1914 @classmethod 

1915 def setUpClass(cls): 

1916 cls.storageClassFactory = StorageClassFactory() 

1917 cls.storageClassFactory.addFromConfig(cls.configFile) 

1918 

1919 def setUp(self): 

1920 self.root = makeTestTempDir(TESTDIR) 

1921 self.config = Config(self.configFile) 

1922 

1923 def tearDown(self): 

1924 removeTestTempDir(self.root) 

1925 

1926 def create_butler(self, manager, label): 

1927 config = Config(self.configFile) 

1928 config["registry", "managers", "datasets"] = manager 

1929 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

1930 

1931 def create_butlers(self, manager1, manager2): 

1932 self.source_butler = self.create_butler(manager1, "1") 

1933 self.target_butler = self.create_butler(manager2, "2") 

1934 

1935 def testTransferUuidToUuid(self): 

1936 self.create_butlers( 

1937 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1938 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1939 ) 

1940 # Setting id_gen_map should have no effect here 

1941 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1942 

1943 def testTransferIntToInt(self): 

1944 self.create_butlers( 

1945 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

1946 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

1947 ) 

1948 # int dataset ID only allows UNIQUE 

1949 self.assertButlerTransfers() 

1950 

1951 def testTransferIntToUuid(self): 

1952 self.create_butlers( 

1953 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

1954 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1955 ) 

1956 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1957 

1958 def testTransferMissing(self): 

1959 """Test transfers where datastore records are missing. 

1960 

1961 This is how execution butler works. 

1962 """ 

1963 self.create_butlers( 

1964 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1965 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1966 ) 

1967 

1968 # Configure the source butler to allow trust. 

1969 self.source_butler.datastore.trustGetRequest = True 

1970 

1971 self.assertButlerTransfers(purge=True) 

1972 

1973 def testTransferMissingDisassembly(self): 

1974 """Test transfers where datastore records are missing. 

1975 

1976 This is how execution butler works. 

1977 """ 

1978 self.create_butlers( 

1979 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1980 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1981 ) 

1982 

1983 # Configure the source butler to allow trust. 

1984 self.source_butler.datastore.trustGetRequest = True 

1985 

1986 # Test disassembly. 

1987 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

1988 

1989 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"): 

1990 """Test that a run can be transferred to another butler.""" 

1991 

1992 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

1993 datasetTypeName = "random_data" 

1994 

1995 # Test will create 3 collections and we will want to transfer 

1996 # two of those three. 

1997 runs = ["run1", "run2", "other"] 

1998 

1999 # Also want to use two different dataset types to ensure that 

2000 # grouping works. 

2001 datasetTypeNames = ["random_data", "random_data_2"] 

2002 

2003 # Create the run collections in the source butler. 

2004 for run in runs: 

2005 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2006 

2007 # Create dimensions in both butlers (transfer will not create them). 

2008 n_exposures = 30 

2009 for butler in (self.source_butler, self.target_butler): 

2010 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

2011 butler.registry.insertDimensionData( 

2012 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

2013 ) 

2014 butler.registry.insertDimensionData( 

2015 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

2016 ) 

2017 

2018 for i in range(n_exposures): 

2019 butler.registry.insertDimensionData( 

2020 "exposure", 

2021 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

2022 ) 

2023 

2024 # Create dataset types in the source butler. 

2025 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

2026 for datasetTypeName in datasetTypeNames: 

2027 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2028 self.source_butler.registry.registerDatasetType(datasetType) 

2029 

2030 # Write a dataset to an unrelated run -- this will ensure that 

2031 # we are rewriting integer dataset ids in the target if necessary. 

2032 # Will not be relevant for UUID. 

2033 run = "distraction" 

2034 butler = Butler(butler=self.source_butler, run=run) 

2035 butler.put( 

2036 makeExampleMetrics(), 

2037 datasetTypeName, 

2038 exposure=1, 

2039 instrument="DummyCamComp", 

2040 physical_filter="d-r", 

2041 ) 

2042 

2043 # Write some example metrics to the source 

2044 butler = Butler(butler=self.source_butler) 

2045 

2046 # Set of DatasetRefs that should be in the list of refs to transfer 

2047 # but which will not be transferred. 

2048 deleted = set() 

2049 

2050 n_expected = 20 # Number of datasets expected to be transferred 

2051 source_refs = [] 

2052 for i in range(n_exposures): 

2053 # Put a third of datasets into each collection, only retain 

2054 # two thirds. 

2055 index = i % 3 

2056 run = runs[index] 

2057 datasetTypeName = datasetTypeNames[i % 2] 

2058 

2059 metric_data = { 

2060 "summary": {"counter": i}, 

2061 "output": {"text": "metric"}, 

2062 "data": [2 * x for x in range(i)], 

2063 } 

2064 metric = MetricsExample(**metric_data) 

2065 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2066 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2067 

2068 # Remove the datastore record using low-level API 

2069 if purge: 

2070 # Remove records for a fraction. 

2071 if index == 1: 

2072 

2073 # For one of these delete the file as well. 

2074 # This allows the "missing" code to filter the 

2075 # file out. 

2076 if not deleted: 

2077 primary, uris = butler.datastore.getURIs(ref) 

2078 if primary: 

2079 primary.remove() 

2080 for uri in uris.values(): 

2081 uri.remove() 

2082 n_expected -= 1 

2083 deleted.add(ref) 

2084 

2085 # Remove the datastore record. 

2086 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

2087 

2088 if index < 2: 

2089 source_refs.append(ref) 

2090 if ref not in deleted: 

2091 new_metric = butler.get(ref.unresolved(), collections=run) 

2092 self.assertEqual(new_metric, metric) 

2093 

2094 # Create some bad dataset types to ensure we check for inconsistent 

2095 # definitions. 

2096 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2097 for datasetTypeName in datasetTypeNames: 

2098 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2099 self.target_butler.registry.registerDatasetType(datasetType) 

2100 with self.assertRaises(ConflictingDefinitionError): 

2101 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2102 # And remove the bad definitions. 

2103 for datasetTypeName in datasetTypeNames: 

2104 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2105 

2106 # Transfer without creating dataset types should fail. 

2107 with self.assertRaises(KeyError): 

2108 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2109 

2110 # Now transfer them to the second butler 

2111 with self.assertLogs(level=logging.DEBUG) as cm: 

2112 transferred = self.target_butler.transfer_from( 

2113 self.source_butler, source_refs, id_gen_map=id_gen_map, register_dataset_types=True 

2114 ) 

2115 self.assertEqual(len(transferred), n_expected) 

2116 log_output = ";".join(cm.output) 

2117 self.assertIn("found in datastore for chunk", log_output) 

2118 self.assertIn("Creating output run", log_output) 

2119 

2120 # Do the transfer twice to ensure that it will do nothing extra. 

2121 # Only do this if purge=True because it does not work for int 

2122 # dataset_id. 

2123 if purge: 

2124 # This should not need to register dataset types. 

2125 transferred = self.target_butler.transfer_from( 

2126 self.source_butler, source_refs, id_gen_map=id_gen_map 

2127 ) 

2128 self.assertEqual(len(transferred), n_expected) 

2129 

2130 # Also do an explicit low-level transfer to trigger some 

2131 # edge cases. 

2132 with self.assertLogs(level=logging.DEBUG) as cm: 

2133 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

2134 log_output = ";".join(cm.output) 

2135 self.assertIn("no file artifacts exist", log_output) 

2136 

2137 with self.assertRaises(TypeError): 

2138 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) 

2139 

2140 with self.assertRaises(ValueError): 

2141 self.target_butler.datastore.transfer_from( 

2142 self.source_butler.datastore, source_refs, transfer="split" 

2143 ) 

2144 

2145 # Now try to get the same refs from the new butler. 

2146 for ref in source_refs: 

2147 if ref not in deleted: 

2148 unresolved_ref = ref.unresolved() 

2149 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run) 

2150 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run) 

2151 self.assertEqual(new_metric, old_metric) 

2152 

2153 # Now prune run2 collection and create instead a CHAINED collection. 

2154 # This should block the transfer. 

2155 self.target_butler.pruneCollection("run2", purge=True, unstore=True) 

2156 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2157 with self.assertRaises(CollectionTypeError): 

2158 # Re-importing the run1 datasets can be problematic if they 

2159 # use integer IDs so filter those out. 

2160 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2161 self.target_butler.transfer_from(self.source_butler, to_transfer, id_gen_map=id_gen_map) 

2162 

2163 

2164if __name__ == "__main__": 2164 ↛ 2165line 2164 didn't jump to line 2165, because the condition on line 2164 was never true

2165 unittest.main()