Coverage for tests/test_butler.py: 12%

1096 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-04-07 00:58 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import gc 

26import logging 

27import os 

28import pathlib 

29import pickle 

30import posixpath 

31import random 

32import shutil 

33import string 

34import tempfile 

35import unittest 

36 

37try: 

38 import boto3 

39 import botocore 

40 from moto import mock_s3 

41except ImportError: 

42 boto3 = None 

43 

44 def mock_s3(cls): 

45 """A no-op decorator in case moto mock_s3 can not be imported.""" 

46 return cls 

47 

48 

49try: 

50 # It's possible but silly to have testing.postgresql installed without 

51 # having the postgresql server installed (because then nothing in 

52 # testing.postgresql would work), so we use the presence of that module 

53 # to test whether we can expect the server to be available. 

54 import testing.postgresql 

55except ImportError: 

56 testing = None 

57 

58import astropy.time 

59import sqlalchemy 

60from lsst.daf.butler import ( 

61 Butler, 

62 ButlerConfig, 

63 CollectionType, 

64 Config, 

65 DatasetIdGenEnum, 

66 DatasetRef, 

67 DatasetType, 

68 FileDataset, 

69 FileTemplate, 

70 FileTemplateValidationError, 

71 StorageClassFactory, 

72 ValidationError, 

73 script, 

74) 

75from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

76from lsst.daf.butler.registry import ( 

77 CollectionError, 

78 CollectionTypeError, 

79 ConflictingDefinitionError, 

80 DataIdValueError, 

81 MissingCollectionError, 

82 OrphanedRecordError, 

83) 

84from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

85from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

86from lsst.resources import ResourcePath 

87from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

88from lsst.utils import doImport 

89from lsst.utils.introspection import get_full_type_name 

90 

91TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

92 

93 

94def makeExampleMetrics(): 

95 return MetricsExample( 

96 {"AM1": 5.2, "AM2": 30.6}, 

97 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

98 [563, 234, 456.7, 752, 8, 9, 27], 

99 ) 

100 

101 

102class TransactionTestError(Exception): 

103 """Specific error for testing transactions, to prevent misdiagnosing 

104 that might otherwise occur when a standard exception is used. 

105 """ 

106 

107 pass 

108 

109 

110class ButlerConfigTests(unittest.TestCase): 

111 """Simple tests for ButlerConfig that are not tested in any other test 

112 cases.""" 

113 

114 def testSearchPath(self): 

115 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

116 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

117 config1 = ButlerConfig(configFile) 

118 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

119 

120 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

121 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

122 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

123 self.assertIn("testConfigs", "\n".join(cm.output)) 

124 

125 key = ("datastore", "records", "table") 

126 self.assertNotEqual(config1[key], config2[key]) 

127 self.assertEqual(config2[key], "override_record") 

128 

129 

130class ButlerPutGetTests: 

131 """Helper method for running a suite of put/get tests from different 

132 butler configurations.""" 

133 

134 root = None 

135 default_run = "ingésτ😺" 

136 

137 @staticmethod 

138 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

139 """Create a DatasetType and register it""" 

140 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

141 registry.registerDatasetType(datasetType) 

142 return datasetType 

143 

144 @classmethod 

145 def setUpClass(cls): 

146 cls.storageClassFactory = StorageClassFactory() 

147 cls.storageClassFactory.addFromConfig(cls.configFile) 

148 

149 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

150 datasetType = datasetRef.datasetType 

151 dataId = datasetRef.dataId 

152 deferred = butler.getDeferred(datasetRef) 

153 

154 for component in components: 

155 compTypeName = datasetType.componentTypeName(component) 

156 result = butler.get(compTypeName, dataId, collections=collections) 

157 self.assertEqual(result, getattr(reference, component)) 

158 result_deferred = deferred.get(component=component) 

159 self.assertEqual(result_deferred, result) 

160 

161 def tearDown(self): 

162 removeTestTempDir(self.root) 

163 

164 def create_butler(self, run, storageClass, datasetTypeName): 

165 butler = Butler(self.tmpConfigFile, run=run) 

166 

167 collections = set(butler.registry.queryCollections()) 

168 self.assertEqual(collections, set([run])) 

169 

170 # Create and register a DatasetType 

171 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

172 

173 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

174 

175 # Add needed Dimensions 

176 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

177 butler.registry.insertDimensionData( 

178 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

179 ) 

180 butler.registry.insertDimensionData( 

181 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

182 ) 

183 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

184 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

185 butler.registry.insertDimensionData( 

186 "visit", 

187 { 

188 "instrument": "DummyCamComp", 

189 "id": 423, 

190 "name": "fourtwentythree", 

191 "physical_filter": "d-r", 

192 "visit_system": 1, 

193 "datetime_begin": visit_start, 

194 "datetime_end": visit_end, 

195 }, 

196 ) 

197 

198 # Add more visits for some later tests 

199 for visit_id in (424, 425): 

200 butler.registry.insertDimensionData( 

201 "visit", 

202 { 

203 "instrument": "DummyCamComp", 

204 "id": visit_id, 

205 "name": f"fourtwentyfour_{visit_id}", 

206 "physical_filter": "d-r", 

207 "visit_system": 1, 

208 }, 

209 ) 

210 return butler, datasetType 

211 

212 def runPutGetTest(self, storageClass, datasetTypeName): 

213 # New datasets will be added to run and tag, but we will only look in 

214 # tag when looking up datasets. 

215 run = self.default_run 

216 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

217 

218 # Create and store a dataset 

219 metric = makeExampleMetrics() 

220 dataId = {"instrument": "DummyCamComp", "visit": 423} 

221 

222 # Create a DatasetRef for put 

223 refIn = DatasetRef(datasetType, dataId, id=None) 

224 

225 # Put with a preexisting id should fail 

226 with self.assertRaises(ValueError): 

227 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

228 

229 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

230 # and once with a DatasetType 

231 

232 # Keep track of any collections we add and do not clean up 

233 expected_collections = {run} 

234 

235 counter = 0 

236 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

237 # Since we are using subTest we can get cascading failures 

238 # here with the first attempt failing and the others failing 

239 # immediately because the dataset already exists. Work around 

240 # this by using a distinct run collection each time 

241 counter += 1 

242 this_run = f"put_run_{counter}" 

243 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

244 expected_collections.update({this_run}) 

245 

246 with self.subTest(args=args): 

247 ref = butler.put(metric, *args, run=this_run) 

248 self.assertIsInstance(ref, DatasetRef) 

249 

250 # Test getDirect 

251 metricOut = butler.get(ref) 

252 self.assertEqual(metric, metricOut) 

253 # Test get 

254 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

255 self.assertEqual(metric, metricOut) 

256 # Test get with a datasetRef 

257 metricOut = butler.get(ref, collections=this_run) 

258 self.assertEqual(metric, metricOut) 

259 # Test getDeferred with dataId 

260 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

261 self.assertEqual(metric, metricOut) 

262 # Test getDeferred with a datasetRef 

263 metricOut = butler.getDeferred(ref, collections=this_run).get() 

264 self.assertEqual(metric, metricOut) 

265 # and deferred direct with ref 

266 metricOut = butler.getDeferred(ref).get() 

267 self.assertEqual(metric, metricOut) 

268 

269 # Check we can get components 

270 if storageClass.isComposite(): 

271 self.assertGetComponents( 

272 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

273 ) 

274 

275 # Can the artifacts themselves be retrieved? 

276 if not butler.datastore.isEphemeral: 

277 root_uri = ResourcePath(self.root) 

278 

279 for preserve_path in (True, False): 

280 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

281 # Use copy so that we can test that overwrite 

282 # protection works (using "auto" for File URIs would 

283 # use hard links and subsequent transfer would work 

284 # because it knows they are the same file). 

285 transferred = butler.retrieveArtifacts( 

286 [ref], destination, preserve_path=preserve_path, transfer="copy" 

287 ) 

288 self.assertGreater(len(transferred), 0) 

289 artifacts = list(ResourcePath.findFileResources([destination])) 

290 self.assertEqual(set(transferred), set(artifacts)) 

291 

292 for artifact in transferred: 

293 path_in_destination = artifact.relative_to(destination) 

294 self.assertIsNotNone(path_in_destination) 

295 

296 # when path is not preserved there should not be 

297 # any path separators. 

298 num_seps = path_in_destination.count("/") 

299 if preserve_path: 

300 self.assertGreater(num_seps, 0) 

301 else: 

302 self.assertEqual(num_seps, 0) 

303 

304 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

305 n_uris = len(secondary_uris) 

306 if primary_uri: 

307 n_uris += 1 

308 self.assertEqual( 

309 len(artifacts), 

310 n_uris, 

311 "Comparing expected artifacts vs actual:" 

312 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

313 ) 

314 

315 if preserve_path: 

316 # No need to run these twice 

317 with self.assertRaises(ValueError): 

318 butler.retrieveArtifacts([ref], destination, transfer="move") 

319 

320 with self.assertRaises(FileExistsError): 

321 butler.retrieveArtifacts([ref], destination) 

322 

323 transferred_again = butler.retrieveArtifacts( 

324 [ref], destination, preserve_path=preserve_path, overwrite=True 

325 ) 

326 self.assertEqual(set(transferred_again), set(transferred)) 

327 

328 # Now remove the dataset completely. 

329 butler.pruneDatasets([ref], purge=True, unstore=True) 

330 # Lookup with original args should still fail. 

331 with self.assertRaises(LookupError): 

332 butler.datasetExists(*args, collections=this_run) 

333 # get() should still fail. 

334 with self.assertRaises(FileNotFoundError): 

335 butler.get(ref) 

336 # Registry shouldn't be able to find it by dataset_id anymore. 

337 self.assertIsNone(butler.registry.getDataset(ref.id)) 

338 

339 # Do explicit registry removal since we know they are 

340 # empty 

341 butler.registry.removeCollection(this_run) 

342 expected_collections.remove(this_run) 

343 

344 # Put the dataset again, since the last thing we did was remove it 

345 # and we want to use the default collection. 

346 ref = butler.put(metric, refIn) 

347 

348 # Get with parameters 

349 stop = 4 

350 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

351 self.assertNotEqual(metric, sliced) 

352 self.assertEqual(metric.summary, sliced.summary) 

353 self.assertEqual(metric.output, sliced.output) 

354 self.assertEqual(metric.data[:stop], sliced.data) 

355 # getDeferred with parameters 

356 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

357 self.assertNotEqual(metric, sliced) 

358 self.assertEqual(metric.summary, sliced.summary) 

359 self.assertEqual(metric.output, sliced.output) 

360 self.assertEqual(metric.data[:stop], sliced.data) 

361 # getDeferred with deferred parameters 

362 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

363 self.assertNotEqual(metric, sliced) 

364 self.assertEqual(metric.summary, sliced.summary) 

365 self.assertEqual(metric.output, sliced.output) 

366 self.assertEqual(metric.data[:stop], sliced.data) 

367 

368 if storageClass.isComposite(): 

369 # Check that components can be retrieved 

370 metricOut = butler.get(ref.datasetType.name, dataId) 

371 compNameS = ref.datasetType.componentTypeName("summary") 

372 compNameD = ref.datasetType.componentTypeName("data") 

373 summary = butler.get(compNameS, dataId) 

374 self.assertEqual(summary, metric.summary) 

375 data = butler.get(compNameD, dataId) 

376 self.assertEqual(data, metric.data) 

377 

378 if "counter" in storageClass.derivedComponents: 

379 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

380 self.assertEqual(count, len(data)) 

381 

382 count = butler.get( 

383 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

384 ) 

385 self.assertEqual(count, stop) 

386 

387 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

388 summary = butler.get(compRef) 

389 self.assertEqual(summary, metric.summary) 

390 

391 # Create a Dataset type that has the same name but is inconsistent. 

392 inconsistentDatasetType = DatasetType( 

393 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

394 ) 

395 

396 # Getting with a dataset type that does not match registry fails 

397 with self.assertRaises(ValueError): 

398 butler.get(inconsistentDatasetType, dataId) 

399 

400 # Combining a DatasetRef with a dataId should fail 

401 with self.assertRaises(ValueError): 

402 butler.get(ref, dataId) 

403 # Getting with an explicit ref should fail if the id doesn't match 

404 with self.assertRaises(ValueError): 

405 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

406 

407 # Getting a dataset with unknown parameters should fail 

408 with self.assertRaises(KeyError): 

409 butler.get(ref, parameters={"unsupported": True}) 

410 

411 # Check we have a collection 

412 collections = set(butler.registry.queryCollections()) 

413 self.assertEqual(collections, expected_collections) 

414 

415 # Clean up to check that we can remove something that may have 

416 # already had a component removed 

417 butler.pruneDatasets([ref], unstore=True, purge=True) 

418 

419 # Check that we can configure a butler to accept a put even 

420 # if it already has the dataset in registry. 

421 ref = butler.put(metric, refIn) 

422 

423 # Repeat put will fail. 

424 with self.assertRaises(ConflictingDefinitionError): 

425 butler.put(metric, refIn) 

426 

427 # Remove the datastore entry. 

428 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

429 

430 # Put will still fail 

431 with self.assertRaises(ConflictingDefinitionError): 

432 butler.put(metric, refIn) 

433 

434 # Allow the put to succeed 

435 butler._allow_put_of_predefined_dataset = True 

436 ref2 = butler.put(metric, refIn) 

437 self.assertEqual(ref2.id, ref.id) 

438 

439 # A second put will still fail but with a different exception 

440 # than before. 

441 with self.assertRaises(ConflictingDefinitionError): 

442 butler.put(metric, refIn) 

443 

444 # Reset the flag to avoid confusion 

445 butler._allow_put_of_predefined_dataset = False 

446 

447 # Leave the dataset in place since some downstream tests require 

448 # something to be present 

449 

450 return butler 

451 

452 def testDeferredCollectionPassing(self): 

453 # Construct a butler with no run or collection, but make it writeable. 

454 butler = Butler(self.tmpConfigFile, writeable=True) 

455 # Create and register a DatasetType 

456 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

457 datasetType = self.addDatasetType( 

458 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

459 ) 

460 # Add needed Dimensions 

461 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

462 butler.registry.insertDimensionData( 

463 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

464 ) 

465 butler.registry.insertDimensionData( 

466 "visit", 

467 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

468 ) 

469 dataId = {"instrument": "DummyCamComp", "visit": 423} 

470 # Create dataset. 

471 metric = makeExampleMetrics() 

472 # Register a new run and put dataset. 

473 run = "deferred" 

474 self.assertTrue(butler.registry.registerRun(run)) 

475 # Second time it will be allowed but indicate no-op 

476 self.assertFalse(butler.registry.registerRun(run)) 

477 ref = butler.put(metric, datasetType, dataId, run=run) 

478 # Putting with no run should fail with TypeError. 

479 with self.assertRaises(CollectionError): 

480 butler.put(metric, datasetType, dataId) 

481 # Dataset should exist. 

482 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

483 # We should be able to get the dataset back, but with and without 

484 # a deferred dataset handle. 

485 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

486 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

487 # Trying to find the dataset without any collection is a TypeError. 

488 with self.assertRaises(CollectionError): 

489 butler.datasetExists(datasetType, dataId) 

490 with self.assertRaises(CollectionError): 

491 butler.get(datasetType, dataId) 

492 # Associate the dataset with a different collection. 

493 butler.registry.registerCollection("tagged") 

494 butler.registry.associate("tagged", [ref]) 

495 # Deleting the dataset from the new collection should make it findable 

496 # in the original collection. 

497 butler.pruneDatasets([ref], tags=["tagged"]) 

498 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

499 

500 

501class ButlerTests(ButlerPutGetTests): 

502 """Tests for Butler.""" 

503 

504 useTempRoot = True 

505 

506 def setUp(self): 

507 """Create a new butler root for each test.""" 

508 self.root = makeTestTempDir(TESTDIR) 

509 Butler.makeRepo(self.root, config=Config(self.configFile)) 

510 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

511 

512 def testConstructor(self): 

513 """Independent test of constructor.""" 

514 butler = Butler(self.tmpConfigFile, run=self.default_run) 

515 self.assertIsInstance(butler, Butler) 

516 

517 # Check that butler.yaml is added automatically. 

518 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

519 config_dir = self.tmpConfigFile[: -len(end)] 

520 butler = Butler(config_dir, run=self.default_run) 

521 self.assertIsInstance(butler, Butler) 

522 

523 # Even with a ResourcePath. 

524 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

525 self.assertIsInstance(butler, Butler) 

526 

527 collections = set(butler.registry.queryCollections()) 

528 self.assertEqual(collections, {self.default_run}) 

529 

530 # Check that some special characters can be included in run name. 

531 special_run = "u@b.c-A" 

532 butler_special = Butler(butler=butler, run=special_run) 

533 collections = set(butler_special.registry.queryCollections("*@*")) 

534 self.assertEqual(collections, {special_run}) 

535 

536 butler2 = Butler(butler=butler, collections=["other"]) 

537 self.assertEqual(butler2.collections, ("other",)) 

538 self.assertIsNone(butler2.run) 

539 self.assertIs(butler.datastore, butler2.datastore) 

540 

541 # Test that we can use an environment variable to find this 

542 # repository. 

543 butler_index = Config() 

544 butler_index["label"] = self.tmpConfigFile 

545 for suffix in (".yaml", ".json"): 

546 # Ensure that the content differs so that we know that 

547 # we aren't reusing the cache. 

548 bad_label = f"s3://bucket/not_real{suffix}" 

549 butler_index["bad_label"] = bad_label 

550 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

551 butler_index.dumpToUri(temp_file) 

552 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

553 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label"))) 

554 uri = Butler.get_repo_uri("bad_label") 

555 self.assertEqual(uri, ResourcePath(bad_label)) 

556 uri = Butler.get_repo_uri("label") 

557 butler = Butler(uri, writeable=False) 

558 self.assertIsInstance(butler, Butler) 

559 butler = Butler("label", writeable=False) 

560 self.assertIsInstance(butler, Butler) 

561 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

562 Butler("not_there", writeable=False) 

563 with self.assertRaises(KeyError) as cm: 

564 Butler.get_repo_uri("missing") 

565 self.assertIn("not known to", str(cm.exception)) 

566 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

567 with self.assertRaises(FileNotFoundError): 

568 Butler.get_repo_uri("label") 

569 self.assertEqual(Butler.get_known_repos(), set()) 

570 with self.assertRaises(KeyError) as cm: 

571 # No environment variable set. 

572 Butler.get_repo_uri("label") 

573 self.assertIn("No repository index defined", str(cm.exception)) 

574 with self.assertRaisesRegex(FileNotFoundError, "no known aliases"): 

575 # No aliases registered. 

576 Butler("not_there") 

577 self.assertEqual(Butler.get_known_repos(), set()) 

578 

579 def testBasicPutGet(self): 

580 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

581 self.runPutGetTest(storageClass, "test_metric") 

582 

583 def testCompositePutGetConcrete(self): 

584 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

585 butler = self.runPutGetTest(storageClass, "test_metric") 

586 

587 # Should *not* be disassembled 

588 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

589 self.assertEqual(len(datasets), 1) 

590 uri, components = butler.getURIs(datasets[0]) 

591 self.assertIsInstance(uri, ResourcePath) 

592 self.assertFalse(components) 

593 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

594 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

595 

596 # Predicted dataset 

597 dataId = {"instrument": "DummyCamComp", "visit": 424} 

598 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

599 self.assertFalse(components) 

600 self.assertIsInstance(uri, ResourcePath) 

601 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

602 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

603 

604 def testCompositePutGetVirtual(self): 

605 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

606 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

607 

608 # Should be disassembled 

609 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

610 self.assertEqual(len(datasets), 1) 

611 uri, components = butler.getURIs(datasets[0]) 

612 

613 if butler.datastore.isEphemeral: 

614 # Never disassemble in-memory datastore 

615 self.assertIsInstance(uri, ResourcePath) 

616 self.assertFalse(components) 

617 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

618 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

619 else: 

620 self.assertIsNone(uri) 

621 self.assertEqual(set(components), set(storageClass.components)) 

622 for compuri in components.values(): 

623 self.assertIsInstance(compuri, ResourcePath) 

624 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

625 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

626 

627 # Predicted dataset 

628 dataId = {"instrument": "DummyCamComp", "visit": 424} 

629 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

630 

631 if butler.datastore.isEphemeral: 

632 # Never disassembled 

633 self.assertIsInstance(uri, ResourcePath) 

634 self.assertFalse(components) 

635 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

636 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

637 else: 

638 self.assertIsNone(uri) 

639 self.assertEqual(set(components), set(storageClass.components)) 

640 for compuri in components.values(): 

641 self.assertIsInstance(compuri, ResourcePath) 

642 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

643 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

644 

645 def testStorageClassOverrideGet(self): 

646 """Test storage class conversion on get with override.""" 

647 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

648 datasetTypeName = "anything" 

649 run = self.default_run 

650 

651 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

652 

653 # Create and store a dataset. 

654 metric = makeExampleMetrics() 

655 dataId = {"instrument": "DummyCamComp", "visit": 423} 

656 

657 ref = butler.put(metric, datasetType, dataId) 

658 

659 # Return native type. 

660 retrieved = butler.get(ref) 

661 self.assertEqual(retrieved, metric) 

662 

663 # Specify an override. 

664 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion") 

665 model = butler.get(ref, storageClass=new_sc) 

666 self.assertNotEqual(type(model), type(retrieved)) 

667 self.assertIs(type(model), new_sc.pytype) 

668 self.assertEqual(retrieved, model) 

669 

670 # Defer but override later. 

671 deferred = butler.getDeferred(ref) 

672 model = deferred.get(storageClass=new_sc) 

673 self.assertIs(type(model), new_sc.pytype) 

674 self.assertEqual(retrieved, model) 

675 

676 # Defer but override up front. 

677 deferred = butler.getDeferred(ref, storageClass=new_sc) 

678 model = deferred.get() 

679 self.assertIs(type(model), new_sc.pytype) 

680 self.assertEqual(retrieved, model) 

681 

682 # Retrieve a component. Should be a tuple. 

683 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple") 

684 self.assertIs(type(data), tuple) 

685 self.assertEqual(data, tuple(retrieved.data)) 

686 

687 # Parameter on the write storage class should work regardless 

688 # of read storage class. 

689 data = butler.get( 

690 "anything.data", 

691 dataId, 

692 storageClass="StructuredDataDataTestTuple", 

693 parameters={"slice": slice(2, 4)}, 

694 ) 

695 self.assertEqual(len(data), 2) 

696 

697 # Try a parameter that is known to the read storage class but not 

698 # the write storage class. 

699 with self.assertRaises(KeyError): 

700 butler.get( 

701 "anything.data", 

702 dataId, 

703 storageClass="StructuredDataDataTestTuple", 

704 parameters={"xslice": slice(2, 4)}, 

705 ) 

706 

707 def testPytypePutCoercion(self): 

708 """Test python type coercion on Butler.get and put.""" 

709 

710 # Store some data with the normal example storage class. 

711 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

712 datasetTypeName = "test_metric" 

713 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

714 

715 dataId = {"instrument": "DummyCamComp", "visit": 423} 

716 

717 # Put a dict and this should coerce to a MetricsExample 

718 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

719 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

720 test_metric = butler.get(metric_ref) 

721 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

722 self.assertEqual(test_metric.summary, test_dict["summary"]) 

723 self.assertEqual(test_metric.output, test_dict["output"]) 

724 

725 # Check that the put still works if a DatasetType is given with 

726 # a definition matching this python type. 

727 registry_type = butler.registry.getDatasetType(datasetTypeName) 

728 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

729 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

730 self.assertEqual(metric2_ref.datasetType, registry_type) 

731 

732 # The get will return the type expected by registry. 

733 test_metric2 = butler.get(metric2_ref) 

734 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

735 

736 # Make a new DatasetRef with the compatible but different DatasetType. 

737 # This should now return a dict. 

738 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

739 test_dict2 = butler.get(new_ref) 

740 self.assertEqual(get_full_type_name(test_dict2), "dict") 

741 

742 # Get it again with the wrong dataset type definition using get() 

743 # rather than get(). This should be consistent with get() 

744 # behavior and return the type of the DatasetType. 

745 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

746 self.assertEqual(get_full_type_name(test_dict3), "dict") 

747 

748 def testIngest(self): 

749 butler = Butler(self.tmpConfigFile, run=self.default_run) 

750 

751 # Create and register a DatasetType 

752 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

753 

754 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

755 datasetTypeName = "metric" 

756 

757 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

758 

759 # Add needed Dimensions 

760 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

761 butler.registry.insertDimensionData( 

762 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

763 ) 

764 for detector in (1, 2): 

765 butler.registry.insertDimensionData( 

766 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

767 ) 

768 

769 butler.registry.insertDimensionData( 

770 "visit", 

771 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

772 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

773 ) 

774 

775 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

776 dataRoot = os.path.join(TESTDIR, "data", "basic") 

777 datasets = [] 

778 for detector in (1, 2): 

779 detector_name = f"detector_{detector}" 

780 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

781 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

782 # Create a DatasetRef for ingest 

783 refIn = DatasetRef(datasetType, dataId, id=None) 

784 

785 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

786 

787 butler.ingest(*datasets, transfer="copy") 

788 

789 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

790 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

791 

792 metrics1 = butler.get(datasetTypeName, dataId1) 

793 metrics2 = butler.get(datasetTypeName, dataId2) 

794 self.assertNotEqual(metrics1, metrics2) 

795 

796 # Compare URIs 

797 uri1 = butler.getURI(datasetTypeName, dataId1) 

798 uri2 = butler.getURI(datasetTypeName, dataId2) 

799 self.assertNotEqual(uri1, uri2) 

800 

801 # Now do a multi-dataset but single file ingest 

802 metricFile = os.path.join(dataRoot, "detectors.yaml") 

803 refs = [] 

804 for detector in (1, 2): 

805 detector_name = f"detector_{detector}" 

806 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

807 # Create a DatasetRef for ingest 

808 refs.append(DatasetRef(datasetType, dataId, id=None)) 

809 

810 # Test "move" transfer to ensure that the files themselves 

811 # have disappeared following ingest. 

812 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile: 

813 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy") 

814 

815 datasets = [] 

816 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter)) 

817 

818 butler.ingest(*datasets, transfer="move", record_validation_info=False) 

819 self.assertFalse(tempFile.exists()) 

820 

821 # Check that the datastore recorded no file size. 

822 # Not all datastores can support this. 

823 try: 

824 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0]) 

825 self.assertEqual(infos[0].file_size, -1) 

826 except AttributeError: 

827 pass 

828 

829 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

830 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

831 

832 multi1 = butler.get(datasetTypeName, dataId1) 

833 multi2 = butler.get(datasetTypeName, dataId2) 

834 

835 self.assertEqual(multi1, metrics1) 

836 self.assertEqual(multi2, metrics2) 

837 

838 # Compare URIs 

839 uri1 = butler.getURI(datasetTypeName, dataId1) 

840 uri2 = butler.getURI(datasetTypeName, dataId2) 

841 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

842 

843 # Test that removing one does not break the second 

844 # This line will issue a warning log message for a ChainedDatastore 

845 # that uses an InMemoryDatastore since in-memory can not ingest 

846 # files. 

847 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

848 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

849 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

850 multi2b = butler.get(datasetTypeName, dataId2) 

851 self.assertEqual(multi2, multi2b) 

852 

853 def testPickle(self): 

854 """Test pickle support.""" 

855 butler = Butler(self.tmpConfigFile, run=self.default_run) 

856 butlerOut = pickle.loads(pickle.dumps(butler)) 

857 self.assertIsInstance(butlerOut, Butler) 

858 self.assertEqual(butlerOut._config, butler._config) 

859 self.assertEqual(butlerOut.collections, butler.collections) 

860 self.assertEqual(butlerOut.run, butler.run) 

861 

862 def testGetDatasetTypes(self): 

863 butler = Butler(self.tmpConfigFile, run=self.default_run) 

864 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

865 dimensionEntries = [ 

866 ( 

867 "instrument", 

868 {"instrument": "DummyCam"}, 

869 {"instrument": "DummyHSC"}, 

870 {"instrument": "DummyCamComp"}, 

871 ), 

872 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

873 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

874 ] 

875 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

876 # Add needed Dimensions 

877 for args in dimensionEntries: 

878 butler.registry.insertDimensionData(*args) 

879 

880 # When a DatasetType is added to the registry entries are not created 

881 # for components but querying them can return the components. 

882 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

883 components = set() 

884 for datasetTypeName in datasetTypeNames: 

885 # Create and register a DatasetType 

886 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

887 

888 for componentName in storageClass.components: 

889 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

890 

891 fromRegistry: set[DatasetType] = set() 

892 for parent_dataset_type in butler.registry.queryDatasetTypes(): 

893 fromRegistry.add(parent_dataset_type) 

894 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes()) 

895 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

896 

897 # Now that we have some dataset types registered, validate them 

898 butler.validateConfiguration( 

899 ignore=[ 

900 "test_metric_comp", 

901 "metric3", 

902 "metric5", 

903 "calexp", 

904 "DummySC", 

905 "datasetType.component", 

906 "random_data", 

907 "random_data_2", 

908 ] 

909 ) 

910 

911 # Add a new datasetType that will fail template validation 

912 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

913 if self.validationCanFail: 

914 with self.assertRaises(ValidationError): 

915 butler.validateConfiguration() 

916 

917 # Rerun validation but with a subset of dataset type names 

918 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

919 

920 # Rerun validation but ignore the bad datasetType 

921 butler.validateConfiguration( 

922 ignore=[ 

923 "test_metric_comp", 

924 "metric3", 

925 "metric5", 

926 "calexp", 

927 "DummySC", 

928 "datasetType.component", 

929 "random_data", 

930 "random_data_2", 

931 ] 

932 ) 

933 

934 def testTransaction(self): 

935 butler = Butler(self.tmpConfigFile, run=self.default_run) 

936 datasetTypeName = "test_metric" 

937 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

938 dimensionEntries = ( 

939 ("instrument", {"instrument": "DummyCam"}), 

940 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

941 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

942 ) 

943 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

944 metric = makeExampleMetrics() 

945 dataId = {"instrument": "DummyCam", "visit": 42} 

946 # Create and register a DatasetType 

947 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

948 with self.assertRaises(TransactionTestError): 

949 with butler.transaction(): 

950 # Add needed Dimensions 

951 for args in dimensionEntries: 

952 butler.registry.insertDimensionData(*args) 

953 # Store a dataset 

954 ref = butler.put(metric, datasetTypeName, dataId) 

955 self.assertIsInstance(ref, DatasetRef) 

956 # Test getDirect 

957 metricOut = butler.get(ref) 

958 self.assertEqual(metric, metricOut) 

959 # Test get 

960 metricOut = butler.get(datasetTypeName, dataId) 

961 self.assertEqual(metric, metricOut) 

962 # Check we can get components 

963 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

964 raise TransactionTestError("This should roll back the entire transaction") 

965 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

966 butler.registry.expandDataId(dataId) 

967 # Should raise LookupError for missing data ID value 

968 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

969 butler.get(datasetTypeName, dataId) 

970 # Also check explicitly if Dataset entry is missing 

971 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

972 # Direct retrieval should not find the file in the Datastore 

973 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

974 butler.get(ref) 

975 

976 def testMakeRepo(self): 

977 """Test that we can write butler configuration to a new repository via 

978 the Butler.makeRepo interface and then instantiate a butler from the 

979 repo root. 

980 """ 

981 # Do not run the test if we know this datastore configuration does 

982 # not support a file system root 

983 if self.fullConfigKey is None: 

984 return 

985 

986 # create two separate directories 

987 root1 = tempfile.mkdtemp(dir=self.root) 

988 root2 = tempfile.mkdtemp(dir=self.root) 

989 

990 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

991 limited = Config(self.configFile) 

992 butler1 = Butler(butlerConfig) 

993 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

994 full = Config(self.tmpConfigFile) 

995 butler2 = Butler(butlerConfig) 

996 # Butlers should have the same configuration regardless of whether 

997 # defaults were expanded. 

998 self.assertEqual(butler1._config, butler2._config) 

999 # Config files loaded directly should not be the same. 

1000 self.assertNotEqual(limited, full) 

1001 # Make sure "limited" doesn't have a few keys we know it should be 

1002 # inheriting from defaults. 

1003 self.assertIn(self.fullConfigKey, full) 

1004 self.assertNotIn(self.fullConfigKey, limited) 

1005 

1006 # Collections don't appear until something is put in them 

1007 collections1 = set(butler1.registry.queryCollections()) 

1008 self.assertEqual(collections1, set()) 

1009 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1010 

1011 # Check that a config with no associated file name will not 

1012 # work properly with relocatable Butler repo 

1013 butlerConfig.configFile = None 

1014 with self.assertRaises(ValueError): 

1015 Butler(butlerConfig) 

1016 

1017 with self.assertRaises(FileExistsError): 

1018 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1019 

1020 def testStringification(self): 

1021 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1022 butlerStr = str(butler) 

1023 

1024 if self.datastoreStr is not None: 

1025 for testStr in self.datastoreStr: 

1026 self.assertIn(testStr, butlerStr) 

1027 if self.registryStr is not None: 

1028 self.assertIn(self.registryStr, butlerStr) 

1029 

1030 datastoreName = butler.datastore.name 

1031 if self.datastoreName is not None: 

1032 for testStr in self.datastoreName: 

1033 self.assertIn(testStr, datastoreName) 

1034 

1035 def testButlerRewriteDataId(self): 

1036 """Test that dataIds can be rewritten based on dimension records.""" 

1037 

1038 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1039 

1040 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1041 datasetTypeName = "random_data" 

1042 

1043 # Create dimension records. 

1044 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1045 butler.registry.insertDimensionData( 

1046 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1047 ) 

1048 butler.registry.insertDimensionData( 

1049 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1050 ) 

1051 

1052 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1053 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1054 butler.registry.registerDatasetType(datasetType) 

1055 

1056 n_exposures = 5 

1057 dayobs = 20210530 

1058 

1059 for i in range(n_exposures): 

1060 butler.registry.insertDimensionData( 

1061 "exposure", 

1062 { 

1063 "instrument": "DummyCamComp", 

1064 "id": i, 

1065 "obs_id": f"exp{i}", 

1066 "seq_num": i, 

1067 "day_obs": dayobs, 

1068 "physical_filter": "d-r", 

1069 }, 

1070 ) 

1071 

1072 # Write some data. 

1073 for i in range(n_exposures): 

1074 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1075 

1076 # Use the seq_num for the put to test rewriting. 

1077 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1078 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1079 

1080 # Check that the exposure is correct in the dataId 

1081 self.assertEqual(ref.dataId["exposure"], i) 

1082 

1083 # and check that we can get the dataset back with the same dataId 

1084 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1085 self.assertEqual(new_metric, metric) 

1086 

1087 

1088class FileDatastoreButlerTests(ButlerTests): 

1089 """Common tests and specialization of ButlerTests for butlers backed 

1090 by datastores that inherit from FileDatastore. 

1091 """ 

1092 

1093 def checkFileExists(self, root, relpath): 

1094 """Checks if file exists at a given path (relative to root). 

1095 

1096 Test testPutTemplates verifies actual physical existance of the files 

1097 in the requested location. 

1098 """ 

1099 uri = ResourcePath(root, forceDirectory=True) 

1100 return uri.join(relpath).exists() 

1101 

1102 def testPutTemplates(self): 

1103 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1104 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1105 

1106 # Add needed Dimensions 

1107 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1108 butler.registry.insertDimensionData( 

1109 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1110 ) 

1111 butler.registry.insertDimensionData( 

1112 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1113 ) 

1114 butler.registry.insertDimensionData( 

1115 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1116 ) 

1117 

1118 # Create and store a dataset 

1119 metric = makeExampleMetrics() 

1120 

1121 # Create two almost-identical DatasetTypes (both will use default 

1122 # template) 

1123 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1124 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1125 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1126 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1127 

1128 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1129 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1130 

1131 # Put with exactly the data ID keys needed 

1132 ref = butler.put(metric, "metric1", dataId1) 

1133 uri = butler.getURI(ref) 

1134 self.assertTrue(uri.exists()) 

1135 self.assertTrue( 

1136 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle") 

1137 ) 

1138 

1139 # Check the template based on dimensions 

1140 if hasattr(butler.datastore, "templates"): 

1141 butler.datastore.templates.validateTemplates([ref]) 

1142 

1143 # Put with extra data ID keys (physical_filter is an optional 

1144 # dependency); should not change template (at least the way we're 

1145 # defining them to behave now; the important thing is that they 

1146 # must be consistent). 

1147 ref = butler.put(metric, "metric2", dataId2) 

1148 uri = butler.getURI(ref) 

1149 self.assertTrue(uri.exists()) 

1150 self.assertTrue( 

1151 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle") 

1152 ) 

1153 

1154 # Check the template based on dimensions 

1155 if hasattr(butler.datastore, "templates"): 

1156 butler.datastore.templates.validateTemplates([ref]) 

1157 

1158 # Use a template that has a typo in dimension record metadata. 

1159 # Easier to test with a butler that has a ref with records attached. 

1160 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1161 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1162 path = template.format(ref) 

1163 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1164 

1165 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1166 with self.assertRaises(KeyError): 

1167 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1168 template.format(ref) 

1169 

1170 # Now use a file template that will not result in unique filenames 

1171 with self.assertRaises(FileTemplateValidationError): 

1172 butler.put(metric, "metric3", dataId1) 

1173 

1174 def testImportExport(self): 

1175 # Run put/get tests just to create and populate a repo. 

1176 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1177 self.runImportExportTest(storageClass) 

1178 

1179 @unittest.expectedFailure 

1180 def testImportExportVirtualComposite(self): 

1181 # Run put/get tests just to create and populate a repo. 

1182 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1183 self.runImportExportTest(storageClass) 

1184 

1185 def runImportExportTest(self, storageClass): 

1186 """This test does an export to a temp directory and an import back 

1187 into a new temp directory repo. It does not assume a posix datastore""" 

1188 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1189 

1190 # Test that we must have a file extension. 

1191 with self.assertRaises(ValueError): 

1192 with exportButler.export(filename="dump", directory=".") as export: 

1193 pass 

1194 

1195 # Test that unknown format is not allowed. 

1196 with self.assertRaises(ValueError): 

1197 with exportButler.export(filename="dump.fits", directory=".") as export: 

1198 pass 

1199 

1200 # Test that the repo actually has at least one dataset. 

1201 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1202 self.assertGreater(len(datasets), 0) 

1203 # Add a DimensionRecord that's unused by those datasets. 

1204 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1205 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1206 # Export and then import datasets. 

1207 with safeTestTempDir(TESTDIR) as exportDir: 

1208 exportFile = os.path.join(exportDir, "exports.yaml") 

1209 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1210 export.saveDatasets(datasets) 

1211 # Export the same datasets again. This should quietly do 

1212 # nothing because of internal deduplication, and it shouldn't 

1213 # complain about being asked to export the "htm7" elements even 

1214 # though there aren't any in these datasets or in the database. 

1215 export.saveDatasets(datasets, elements=["htm7"]) 

1216 # Save one of the data IDs again; this should be harmless 

1217 # because of internal deduplication. 

1218 export.saveDataIds([datasets[0].dataId]) 

1219 # Save some dimension records directly. 

1220 export.saveDimensionData("skymap", [skymapRecord]) 

1221 self.assertTrue(os.path.exists(exportFile)) 

1222 with safeTestTempDir(TESTDIR) as importDir: 

1223 # We always want this to be a local posix butler 

1224 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1225 # Calling script.butlerImport tests the implementation of the 

1226 # butler command line interface "import" subcommand. Functions 

1227 # in the script folder are generally considered protected and 

1228 # should not be used as public api. 

1229 with open(exportFile, "r") as f: 

1230 script.butlerImport( 

1231 importDir, 

1232 export_file=f, 

1233 directory=exportDir, 

1234 transfer="auto", 

1235 skip_dimensions=None, 

1236 reuse_ids=False, 

1237 ) 

1238 importButler = Butler(importDir, run=self.default_run) 

1239 for ref in datasets: 

1240 with self.subTest(ref=ref): 

1241 # Test for existence by passing in the DatasetType and 

1242 # data ID separately, to avoid lookup by dataset_id. 

1243 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1244 self.assertEqual( 

1245 list(importButler.registry.queryDimensionRecords("skymap")), 

1246 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)], 

1247 ) 

1248 

1249 def testRemoveRuns(self): 

1250 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1251 butler = Butler(self.tmpConfigFile, writeable=True) 

1252 # Load registry data with dimensions to hang datasets off of. 

1253 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1254 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1255 # Add some RUN-type collection. 

1256 run1 = "run1" 

1257 butler.registry.registerRun(run1) 

1258 run2 = "run2" 

1259 butler.registry.registerRun(run2) 

1260 # put a dataset in each 

1261 metric = makeExampleMetrics() 

1262 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1263 datasetType = self.addDatasetType( 

1264 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1265 ) 

1266 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1267 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1268 uri1 = butler.getURI(ref1, collections=[run1]) 

1269 uri2 = butler.getURI(ref2, collections=[run2]) 

1270 

1271 with self.assertRaises(OrphanedRecordError): 

1272 butler.registry.removeDatasetType(datasetType.name) 

1273 

1274 # Remove from both runs with different values for unstore. 

1275 butler.removeRuns([run1], unstore=True) 

1276 butler.removeRuns([run2], unstore=False) 

1277 # Should be nothing in registry for either one, and datastore should 

1278 # not think either exists. 

1279 with self.assertRaises(MissingCollectionError): 

1280 butler.registry.getCollectionType(run1) 

1281 with self.assertRaises(MissingCollectionError): 

1282 butler.registry.getCollectionType(run2) 

1283 self.assertFalse(butler.datastore.exists(ref1)) 

1284 self.assertFalse(butler.datastore.exists(ref2)) 

1285 # The ref we unstored should be gone according to the URI, but the 

1286 # one we forgot should still be around. 

1287 self.assertFalse(uri1.exists()) 

1288 self.assertTrue(uri2.exists()) 

1289 

1290 # Now that the collections have been pruned we can remove the 

1291 # dataset type 

1292 butler.registry.removeDatasetType(datasetType.name) 

1293 

1294 with self.assertLogs("lsst.daf.butler.registries", "INFO") as cm: 

1295 butler.registry.removeDatasetType(tuple(["test*", "test*"])) 

1296 self.assertIn("not defined", "\n".join(cm.output)) 

1297 

1298 

1299class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1300 """PosixDatastore specialization of a butler""" 

1301 

1302 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1303 fullConfigKey = ".datastore.formatters" 

1304 validationCanFail = True 

1305 datastoreStr = ["/tmp"] 

1306 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1307 registryStr = "/gen3.sqlite3" 

1308 

1309 def testPathConstructor(self): 

1310 """Independent test of constructor using PathLike.""" 

1311 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1312 self.assertIsInstance(butler, Butler) 

1313 

1314 # And again with a Path object with the butler yaml 

1315 path = pathlib.Path(self.tmpConfigFile) 

1316 butler = Butler(path, writeable=False) 

1317 self.assertIsInstance(butler, Butler) 

1318 

1319 # And again with a Path object without the butler yaml 

1320 # (making sure we skip it if the tmp config doesn't end 

1321 # in butler.yaml -- which is the case for a subclass) 

1322 if self.tmpConfigFile.endswith("butler.yaml"): 

1323 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1324 butler = Butler(path, writeable=False) 

1325 self.assertIsInstance(butler, Butler) 

1326 

1327 def testExportTransferCopy(self): 

1328 """Test local export using all transfer modes""" 

1329 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1330 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1331 # Test that the repo actually has at least one dataset. 

1332 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1333 self.assertGreater(len(datasets), 0) 

1334 uris = [exportButler.getURI(d) for d in datasets] 

1335 datastoreRoot = exportButler.datastore.root 

1336 

1337 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1338 

1339 for path in pathsInStore: 

1340 # Assume local file system 

1341 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1342 

1343 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1344 with safeTestTempDir(TESTDIR) as exportDir: 

1345 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1346 export.saveDatasets(datasets) 

1347 for path in pathsInStore: 

1348 self.assertTrue( 

1349 self.checkFileExists(exportDir, path), 

1350 f"Check that mode {transfer} exported files", 

1351 ) 

1352 

1353 def testPruneDatasets(self): 

1354 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1355 butler = Butler(self.tmpConfigFile, writeable=True) 

1356 # Load registry data with dimensions to hang datasets off of. 

1357 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1358 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1359 # Add some RUN-type collections. 

1360 run1 = "run1" 

1361 butler.registry.registerRun(run1) 

1362 run2 = "run2" 

1363 butler.registry.registerRun(run2) 

1364 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1365 # different runs. ref3 has a different data ID. 

1366 metric = makeExampleMetrics() 

1367 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1368 datasetType = self.addDatasetType( 

1369 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1370 ) 

1371 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1372 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1373 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1374 

1375 # Simple prune. 

1376 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1377 with self.assertRaises(LookupError): 

1378 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1) 

1379 

1380 # Put data back. 

1381 ref1 = butler.put(metric, ref1.unresolved(), run=run1) 

1382 ref2 = butler.put(metric, ref2.unresolved(), run=run2) 

1383 ref3 = butler.put(metric, ref3.unresolved(), run=run1) 

1384 

1385 # Check that in normal mode, deleting the record will lead to 

1386 # trash not touching the file. 

1387 uri1 = butler.datastore.getURI(ref1) 

1388 butler.datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table 

1389 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id}) 

1390 butler.datastore.trash(ref1) 

1391 butler.datastore.emptyTrash() 

1392 self.assertTrue(uri1.exists()) 

1393 uri1.remove() # Clean it up. 

1394 

1395 # Simulate execution butler setup by deleting the datastore 

1396 # record but keeping the file around and trusting. 

1397 butler.datastore.trustGetRequest = True 

1398 uri2 = butler.datastore.getURI(ref2) 

1399 uri3 = butler.datastore.getURI(ref3) 

1400 self.assertTrue(uri2.exists()) 

1401 self.assertTrue(uri3.exists()) 

1402 

1403 # Remove the datastore record. 

1404 butler.datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table 

1405 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id}) 

1406 self.assertTrue(uri2.exists()) 

1407 butler.datastore.trash([ref2, ref3]) 

1408 # Immediate removal for ref2 file 

1409 self.assertFalse(uri2.exists()) 

1410 # But ref3 has to wait for the empty. 

1411 self.assertTrue(uri3.exists()) 

1412 butler.datastore.emptyTrash() 

1413 self.assertFalse(uri3.exists()) 

1414 

1415 # Clear out the datasets from registry. 

1416 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1417 

1418 def testPytypeCoercion(self): 

1419 """Test python type coercion on Butler.get and put.""" 

1420 

1421 # Store some data with the normal example storage class. 

1422 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1423 datasetTypeName = "test_metric" 

1424 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1425 

1426 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1427 metric = butler.get(datasetTypeName, dataId=dataId) 

1428 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1429 

1430 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1431 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1432 

1433 # Now need to hack the registry dataset type definition. 

1434 # There is no API for this. 

1435 manager = butler.registry._managers.datasets 

1436 manager._db.update( 

1437 manager._static.dataset_type, 

1438 {"name": datasetTypeName}, 

1439 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1440 ) 

1441 

1442 # Force reset of dataset type cache 

1443 butler.registry.refresh() 

1444 

1445 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1446 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1447 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1448 

1449 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1450 self.assertNotEqual(type(metric_model), type(metric)) 

1451 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1452 

1453 # Put the model and read it back to show that everything now 

1454 # works as normal. 

1455 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1456 metric_model_new = butler.get(metric_ref) 

1457 self.assertEqual(metric_model_new, metric_model) 

1458 

1459 # Hack the storage class again to something that will fail on the 

1460 # get with no conversion class. 

1461 manager._db.update( 

1462 manager._static.dataset_type, 

1463 {"name": datasetTypeName}, 

1464 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1465 ) 

1466 butler.registry.refresh() 

1467 

1468 with self.assertRaises(ValueError): 

1469 butler.get(datasetTypeName, dataId=dataId) 

1470 

1471 

1472@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1473class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1474 """PosixDatastore specialization of a butler using Postgres""" 

1475 

1476 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1477 fullConfigKey = ".datastore.formatters" 

1478 validationCanFail = True 

1479 datastoreStr = ["/tmp"] 

1480 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1481 registryStr = "PostgreSQL@test" 

1482 

1483 @staticmethod 

1484 def _handler(postgresql): 

1485 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1486 with engine.begin() as connection: 

1487 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1488 

1489 @classmethod 

1490 def setUpClass(cls): 

1491 # Create the postgres test server. 

1492 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1493 cache_initialized_db=True, on_initialized=cls._handler 

1494 ) 

1495 super().setUpClass() 

1496 

1497 @classmethod 

1498 def tearDownClass(cls): 

1499 # Clean up any lingering SQLAlchemy engines/connections 

1500 # so they're closed before we shut down the server. 

1501 gc.collect() 

1502 cls.postgresql.clear_cache() 

1503 super().tearDownClass() 

1504 

1505 def setUp(self): 

1506 self.server = self.postgresql() 

1507 

1508 # Need to add a registry section to the config. 

1509 self._temp_config = False 

1510 config = Config(self.configFile) 

1511 config["registry", "db"] = self.server.url() 

1512 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1513 config.dump(fh) 

1514 self.configFile = fh.name 

1515 self._temp_config = True 

1516 super().setUp() 

1517 

1518 def tearDown(self): 

1519 self.server.stop() 

1520 if self._temp_config and os.path.exists(self.configFile): 

1521 os.remove(self.configFile) 

1522 super().tearDown() 

1523 

1524 def testMakeRepo(self): 

1525 # The base class test assumes that it's using sqlite and assumes 

1526 # the config file is acceptable to sqlite. 

1527 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1528 

1529 

1530class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1531 """InMemoryDatastore specialization of a butler""" 

1532 

1533 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1534 fullConfigKey = None 

1535 useTempRoot = False 

1536 validationCanFail = False 

1537 datastoreStr = ["datastore='InMemory"] 

1538 datastoreName = ["InMemoryDatastore@"] 

1539 registryStr = "/gen3.sqlite3" 

1540 

1541 def testIngest(self): 

1542 pass 

1543 

1544 

1545class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1546 """PosixDatastore specialization""" 

1547 

1548 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1549 fullConfigKey = ".datastore.datastores.1.formatters" 

1550 validationCanFail = True 

1551 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1552 datastoreName = [ 

1553 "InMemoryDatastore@", 

1554 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1555 "SecondDatastore", 

1556 ] 

1557 registryStr = "/gen3.sqlite3" 

1558 

1559 

1560class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1561 """Test that a yaml file in one location can refer to a root in another.""" 

1562 

1563 datastoreStr = ["dir1"] 

1564 # Disable the makeRepo test since we are deliberately not using 

1565 # butler.yaml as the config name. 

1566 fullConfigKey = None 

1567 

1568 def setUp(self): 

1569 self.root = makeTestTempDir(TESTDIR) 

1570 

1571 # Make a new repository in one place 

1572 self.dir1 = os.path.join(self.root, "dir1") 

1573 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1574 

1575 # Move the yaml file to a different place and add a "root" 

1576 self.dir2 = os.path.join(self.root, "dir2") 

1577 os.makedirs(self.dir2, exist_ok=True) 

1578 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1579 config = Config(configFile1) 

1580 config["root"] = self.dir1 

1581 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1582 config.dumpToUri(configFile2) 

1583 os.remove(configFile1) 

1584 self.tmpConfigFile = configFile2 

1585 

1586 def testFileLocations(self): 

1587 self.assertNotEqual(self.dir1, self.dir2) 

1588 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1589 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1590 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1591 

1592 

1593class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1594 """Test that a config file created by makeRepo outside of repo works.""" 

1595 

1596 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1597 

1598 def setUp(self): 

1599 self.root = makeTestTempDir(TESTDIR) 

1600 self.root2 = makeTestTempDir(TESTDIR) 

1601 

1602 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1603 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1604 

1605 def tearDown(self): 

1606 if os.path.exists(self.root2): 

1607 shutil.rmtree(self.root2, ignore_errors=True) 

1608 super().tearDown() 

1609 

1610 def testConfigExistence(self): 

1611 c = Config(self.tmpConfigFile) 

1612 uri_config = ResourcePath(c["root"]) 

1613 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1614 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1615 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1616 

1617 def testPutGet(self): 

1618 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1619 self.runPutGetTest(storageClass, "test_metric") 

1620 

1621 

1622class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1623 """Test that a config file created by makeRepo outside of repo works.""" 

1624 

1625 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1626 

1627 def setUp(self): 

1628 self.root = makeTestTempDir(TESTDIR) 

1629 self.root2 = makeTestTempDir(TESTDIR) 

1630 

1631 self.tmpConfigFile = self.root2 

1632 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1633 

1634 def testConfigExistence(self): 

1635 # Append the yaml file else Config constructor does not know the file 

1636 # type. 

1637 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1638 super().testConfigExistence() 

1639 

1640 

1641class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1642 """Test that a config file created by makeRepo outside of repo works.""" 

1643 

1644 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1645 

1646 def setUp(self): 

1647 self.root = makeTestTempDir(TESTDIR) 

1648 self.root2 = makeTestTempDir(TESTDIR) 

1649 

1650 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1651 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1652 

1653 

1654@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1655class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1656 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1657 a local in-memory SqlRegistry. 

1658 """ 

1659 

1660 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1661 fullConfigKey = None 

1662 validationCanFail = True 

1663 

1664 bucketName = "anybucketname" 

1665 """Name of the Bucket that will be used in the tests. The name is read from 

1666 the config file used with the tests during set-up. 

1667 """ 

1668 

1669 root = "butlerRoot/" 

1670 """Root repository directory expected to be used in case useTempRoot=False. 

1671 Otherwise the root is set to a 20 characters long randomly generated string 

1672 during set-up. 

1673 """ 

1674 

1675 datastoreStr = [f"datastore={root}"] 

1676 """Contains all expected root locations in a format expected to be 

1677 returned by Butler stringification. 

1678 """ 

1679 

1680 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1681 """The expected format of the S3 Datastore string.""" 

1682 

1683 registryStr = "/gen3.sqlite3" 

1684 """Expected format of the Registry string.""" 

1685 

1686 mock_s3 = mock_s3() 

1687 """The mocked s3 interface from moto.""" 

1688 

1689 def genRoot(self): 

1690 """Returns a random string of len 20 to serve as a root 

1691 name for the temporary bucket repo. 

1692 

1693 This is equivalent to tempfile.mkdtemp as this is what self.root 

1694 becomes when useTempRoot is True. 

1695 """ 

1696 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1697 return rndstr + "/" 

1698 

1699 def setUp(self): 

1700 config = Config(self.configFile) 

1701 uri = ResourcePath(config[".datastore.datastore.root"]) 

1702 self.bucketName = uri.netloc 

1703 

1704 # Enable S3 mocking of tests. 

1705 self.mock_s3.start() 

1706 

1707 # set up some fake credentials if they do not exist 

1708 self.usingDummyCredentials = setAwsEnvCredentials() 

1709 

1710 if self.useTempRoot: 

1711 self.root = self.genRoot() 

1712 rooturi = f"s3://{self.bucketName}/{self.root}" 

1713 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1714 

1715 # need local folder to store registry database 

1716 self.reg_dir = makeTestTempDir(TESTDIR) 

1717 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1718 

1719 # MOTO needs to know that we expect Bucket bucketname to exist 

1720 # (this used to be the class attribute bucketName) 

1721 s3 = boto3.resource("s3") 

1722 s3.create_bucket(Bucket=self.bucketName) 

1723 

1724 self.datastoreStr = f"datastore={self.root}" 

1725 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1726 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1727 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1728 

1729 def tearDown(self): 

1730 s3 = boto3.resource("s3") 

1731 bucket = s3.Bucket(self.bucketName) 

1732 try: 

1733 bucket.objects.all().delete() 

1734 except botocore.exceptions.ClientError as e: 

1735 if e.response["Error"]["Code"] == "404": 

1736 # the key was not reachable - pass 

1737 pass 

1738 else: 

1739 raise 

1740 

1741 bucket = s3.Bucket(self.bucketName) 

1742 bucket.delete() 

1743 

1744 # Stop the S3 mock. 

1745 self.mock_s3.stop() 

1746 

1747 # unset any potentially set dummy credentials 

1748 if self.usingDummyCredentials: 

1749 unsetAwsEnvCredentials() 

1750 

1751 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1752 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1753 

1754 if self.useTempRoot and os.path.exists(self.root): 

1755 shutil.rmtree(self.root, ignore_errors=True) 

1756 

1757 super().tearDown() 

1758 

1759 

1760class PosixDatastoreTransfers(unittest.TestCase): 

1761 """Test data transfers between butlers. 

1762 

1763 Test for different managers. UUID to UUID and integer to integer are 

1764 tested. UUID to integer is not supported since we do not currently 

1765 want to allow that. Integer to UUID is supported with the caveat 

1766 that UUID4 will be generated and this will be incorrect for raw 

1767 dataset types. The test ignores that. 

1768 """ 

1769 

1770 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1771 

1772 @classmethod 

1773 def setUpClass(cls): 

1774 cls.storageClassFactory = StorageClassFactory() 

1775 cls.storageClassFactory.addFromConfig(cls.configFile) 

1776 

1777 def setUp(self): 

1778 self.root = makeTestTempDir(TESTDIR) 

1779 self.config = Config(self.configFile) 

1780 

1781 def tearDown(self): 

1782 removeTestTempDir(self.root) 

1783 

1784 def create_butler(self, manager, label): 

1785 config = Config(self.configFile) 

1786 config["registry", "managers", "datasets"] = manager 

1787 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

1788 

1789 def create_butlers(self, manager1, manager2): 

1790 self.source_butler = self.create_butler(manager1, "1") 

1791 self.target_butler = self.create_butler(manager2, "2") 

1792 

1793 def testTransferUuidToUuid(self): 

1794 self.create_butlers( 

1795 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1796 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1797 ) 

1798 # Setting id_gen_map should have no effect here 

1799 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1800 

1801 def _enable_trust(self, datastore) -> None: 

1802 if hasattr(datastore, "trustGetRequest"): 

1803 datastore.trustGetRequest = True 

1804 elif hasattr(datastore, "datastores"): 

1805 for datastore in datastore.datastores: 

1806 if hasattr(datastore, "trustGetRequest"): 

1807 datastore.trustGetRequest = True 

1808 

1809 def testTransferMissing(self): 

1810 """Test transfers where datastore records are missing. 

1811 

1812 This is how execution butler works. 

1813 """ 

1814 self.create_butlers( 

1815 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1816 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1817 ) 

1818 

1819 # Configure the source butler to allow trust. 

1820 self._enable_trust(self.source_butler.datastore) 

1821 

1822 self.assertButlerTransfers(purge=True) 

1823 

1824 def testTransferMissingDisassembly(self): 

1825 """Test transfers where datastore records are missing. 

1826 

1827 This is how execution butler works. 

1828 """ 

1829 self.create_butlers( 

1830 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1831 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1832 ) 

1833 

1834 # Configure the source butler to allow trust. 

1835 self._enable_trust(self.source_butler.datastore) 

1836 

1837 # Test disassembly. 

1838 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

1839 

1840 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"): 

1841 """Test that a run can be transferred to another butler.""" 

1842 

1843 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

1844 datasetTypeName = "random_data" 

1845 

1846 # Test will create 3 collections and we will want to transfer 

1847 # two of those three. 

1848 runs = ["run1", "run2", "other"] 

1849 

1850 # Also want to use two different dataset types to ensure that 

1851 # grouping works. 

1852 datasetTypeNames = ["random_data", "random_data_2"] 

1853 

1854 # Create the run collections in the source butler. 

1855 for run in runs: 

1856 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

1857 

1858 # Create dimensions in source butler. 

1859 n_exposures = 30 

1860 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1861 self.source_butler.registry.insertDimensionData( 

1862 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1863 ) 

1864 self.source_butler.registry.insertDimensionData( 

1865 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1866 ) 

1867 

1868 for i in range(n_exposures): 

1869 self.source_butler.registry.insertDimensionData( 

1870 "exposure", 

1871 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

1872 ) 

1873 

1874 # Create dataset types in the source butler. 

1875 dimensions = self.source_butler.registry.dimensions.extract(["instrument", "exposure"]) 

1876 for datasetTypeName in datasetTypeNames: 

1877 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1878 self.source_butler.registry.registerDatasetType(datasetType) 

1879 

1880 # Write a dataset to an unrelated run -- this will ensure that 

1881 # we are rewriting integer dataset ids in the target if necessary. 

1882 # Will not be relevant for UUID. 

1883 run = "distraction" 

1884 butler = Butler(butler=self.source_butler, run=run) 

1885 butler.put( 

1886 makeExampleMetrics(), 

1887 datasetTypeName, 

1888 exposure=1, 

1889 instrument="DummyCamComp", 

1890 physical_filter="d-r", 

1891 ) 

1892 

1893 # Write some example metrics to the source 

1894 butler = Butler(butler=self.source_butler) 

1895 

1896 # Set of DatasetRefs that should be in the list of refs to transfer 

1897 # but which will not be transferred. 

1898 deleted = set() 

1899 

1900 n_expected = 20 # Number of datasets expected to be transferred 

1901 source_refs = [] 

1902 for i in range(n_exposures): 

1903 # Put a third of datasets into each collection, only retain 

1904 # two thirds. 

1905 index = i % 3 

1906 run = runs[index] 

1907 datasetTypeName = datasetTypeNames[i % 2] 

1908 

1909 metric_data = { 

1910 "summary": {"counter": i}, 

1911 "output": {"text": "metric"}, 

1912 "data": [2 * x for x in range(i)], 

1913 } 

1914 metric = MetricsExample(**metric_data) 

1915 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1916 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

1917 

1918 # Remove the datastore record using low-level API 

1919 if purge: 

1920 # Remove records for a fraction. 

1921 if index == 1: 

1922 # For one of these delete the file as well. 

1923 # This allows the "missing" code to filter the 

1924 # file out. 

1925 # Access the individual datastores. 

1926 datastores = [] 

1927 if hasattr(butler.datastore, "datastores"): 

1928 datastores.extend(butler.datastore.datastores) 

1929 else: 

1930 datastores.append(butler.datastore) 

1931 

1932 if not deleted: 

1933 # For a chained datastore we need to remove 

1934 # files in each chain. 

1935 for datastore in datastores: 

1936 # The file might not be known to the datastore 

1937 # if constraints are used. 

1938 try: 

1939 primary, uris = datastore.getURIs(ref) 

1940 except FileNotFoundError: 

1941 continue 

1942 if primary: 

1943 if primary.scheme != "mem": 

1944 primary.remove() 

1945 for uri in uris.values(): 

1946 if uri.scheme != "mem": 

1947 uri.remove() 

1948 n_expected -= 1 

1949 deleted.add(ref) 

1950 

1951 # Remove the datastore record. 

1952 for datastore in datastores: 

1953 if hasattr(datastore, "removeStoredItemInfo"): 

1954 datastore.removeStoredItemInfo(ref) 

1955 

1956 if index < 2: 

1957 source_refs.append(ref) 

1958 if ref not in deleted: 

1959 new_metric = butler.get(ref.unresolved(), collections=run) 

1960 self.assertEqual(new_metric, metric) 

1961 

1962 # Create some bad dataset types to ensure we check for inconsistent 

1963 # definitions. 

1964 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

1965 for datasetTypeName in datasetTypeNames: 

1966 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

1967 self.target_butler.registry.registerDatasetType(datasetType) 

1968 with self.assertRaises(ConflictingDefinitionError) as cm: 

1969 self.target_butler.transfer_from(self.source_butler, source_refs) 

1970 self.assertIn("dataset type differs", str(cm.exception)) 

1971 

1972 # And remove the bad definitions. 

1973 for datasetTypeName in datasetTypeNames: 

1974 self.target_butler.registry.removeDatasetType(datasetTypeName) 

1975 

1976 # Transfer without creating dataset types should fail. 

1977 with self.assertRaises(KeyError): 

1978 self.target_butler.transfer_from(self.source_butler, source_refs) 

1979 

1980 # Transfer without creating dimensions should fail. 

1981 with self.assertRaises(ConflictingDefinitionError) as cm: 

1982 self.target_butler.transfer_from(self.source_butler, source_refs, register_dataset_types=True) 

1983 self.assertIn("dimension", str(cm.exception)) 

1984 

1985 # The failed transfer above leaves registry in an inconsistent 

1986 # state because the run is created but then rolled back without 

1987 # the collection cache being cleared. For now force a refresh. 

1988 # Can remove with DM-35498. 

1989 self.target_butler.registry.refresh() 

1990 

1991 # Now transfer them to the second butler, including dimensions. 

1992 with self.assertLogs(level=logging.DEBUG) as cm: 

1993 transferred = self.target_butler.transfer_from( 

1994 self.source_butler, 

1995 source_refs, 

1996 register_dataset_types=True, 

1997 transfer_dimensions=True, 

1998 ) 

1999 self.assertEqual(len(transferred), n_expected) 

2000 log_output = ";".join(cm.output) 

2001 

2002 # A ChainedDatastore will use the in-memory datastore for mexists 

2003 # so we can not rely on the mexists log message. 

2004 self.assertIn("Number of datastore records found in source", log_output) 

2005 self.assertIn("Creating output run", log_output) 

2006 

2007 # Do the transfer twice to ensure that it will do nothing extra. 

2008 # Only do this if purge=True because it does not work for int 

2009 # dataset_id. 

2010 if purge: 

2011 # This should not need to register dataset types. 

2012 transferred = self.target_butler.transfer_from(self.source_butler, source_refs) 

2013 self.assertEqual(len(transferred), n_expected) 

2014 

2015 # Also do an explicit low-level transfer to trigger some 

2016 # edge cases. 

2017 with self.assertLogs(level=logging.DEBUG) as cm: 

2018 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

2019 log_output = ";".join(cm.output) 

2020 self.assertIn("no file artifacts exist", log_output) 

2021 

2022 with self.assertRaises((TypeError, AttributeError)): 

2023 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) 

2024 

2025 with self.assertRaises(ValueError): 

2026 self.target_butler.datastore.transfer_from( 

2027 self.source_butler.datastore, source_refs, transfer="split" 

2028 ) 

2029 

2030 # Now try to get the same refs from the new butler. 

2031 for ref in source_refs: 

2032 if ref not in deleted: 

2033 unresolved_ref = ref.unresolved() 

2034 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run) 

2035 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run) 

2036 self.assertEqual(new_metric, old_metric) 

2037 

2038 # Now prune run2 collection and create instead a CHAINED collection. 

2039 # This should block the transfer. 

2040 self.target_butler.removeRuns(["run2"], unstore=True) 

2041 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2042 with self.assertRaises(CollectionTypeError): 

2043 # Re-importing the run1 datasets can be problematic if they 

2044 # use integer IDs so filter those out. 

2045 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2046 self.target_butler.transfer_from(self.source_butler, to_transfer) 

2047 

2048 

2049class ChainedDatastoreTransfers(PosixDatastoreTransfers): 

2050 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

2051 

2052 

2053if __name__ == "__main__": 

2054 unittest.main()