Coverage for tests/test_butler.py: 13%

1071 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-28 02:30 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import gc 

26import logging 

27import os 

28import pathlib 

29import pickle 

30import posixpath 

31import random 

32import shutil 

33import string 

34import tempfile 

35import unittest 

36 

37try: 

38 import boto3 

39 import botocore 

40 from moto import mock_s3 

41except ImportError: 

42 boto3 = None 

43 

44 def mock_s3(cls): 

45 """A no-op decorator in case moto mock_s3 can not be imported.""" 

46 return cls 

47 

48 

49try: 

50 # It's possible but silly to have testing.postgresql installed without 

51 # having the postgresql server installed (because then nothing in 

52 # testing.postgresql would work), so we use the presence of that module 

53 # to test whether we can expect the server to be available. 

54 import testing.postgresql 

55except ImportError: 

56 testing = None 

57 

58import astropy.time 

59import sqlalchemy 

60from lsst.daf.butler import ( 

61 Butler, 

62 ButlerConfig, 

63 CollectionType, 

64 Config, 

65 DatasetIdGenEnum, 

66 DatasetRef, 

67 DatasetType, 

68 FileDataset, 

69 FileTemplate, 

70 FileTemplateValidationError, 

71 StorageClassFactory, 

72 ValidationError, 

73 script, 

74) 

75from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

76from lsst.daf.butler.registry import ( 

77 CollectionError, 

78 CollectionTypeError, 

79 ConflictingDefinitionError, 

80 DataIdValueError, 

81 MissingCollectionError, 

82) 

83from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

84from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

85from lsst.resources import ResourcePath 

86from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

87from lsst.utils import doImport 

88from lsst.utils.introspection import get_full_type_name 

89 

90TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

91 

92 

93def makeExampleMetrics(): 

94 return MetricsExample( 

95 {"AM1": 5.2, "AM2": 30.6}, 

96 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

97 [563, 234, 456.7, 752, 8, 9, 27], 

98 ) 

99 

100 

101class TransactionTestError(Exception): 

102 """Specific error for testing transactions, to prevent misdiagnosing 

103 that might otherwise occur when a standard exception is used. 

104 """ 

105 

106 pass 

107 

108 

109class ButlerConfigTests(unittest.TestCase): 

110 """Simple tests for ButlerConfig that are not tested in any other test 

111 cases.""" 

112 

113 def testSearchPath(self): 

114 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

115 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

116 config1 = ButlerConfig(configFile) 

117 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

118 

119 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

120 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

121 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

122 self.assertIn("testConfigs", "\n".join(cm.output)) 

123 

124 key = ("datastore", "records", "table") 

125 self.assertNotEqual(config1[key], config2[key]) 

126 self.assertEqual(config2[key], "override_record") 

127 

128 

129class ButlerPutGetTests: 

130 """Helper method for running a suite of put/get tests from different 

131 butler configurations.""" 

132 

133 root = None 

134 default_run = "ingésτ😺" 

135 

136 @staticmethod 

137 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

138 """Create a DatasetType and register it""" 

139 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

140 registry.registerDatasetType(datasetType) 

141 return datasetType 

142 

143 @classmethod 

144 def setUpClass(cls): 

145 cls.storageClassFactory = StorageClassFactory() 

146 cls.storageClassFactory.addFromConfig(cls.configFile) 

147 

148 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

149 datasetType = datasetRef.datasetType 

150 dataId = datasetRef.dataId 

151 deferred = butler.getDirectDeferred(datasetRef) 

152 

153 for component in components: 

154 compTypeName = datasetType.componentTypeName(component) 

155 result = butler.get(compTypeName, dataId, collections=collections) 

156 self.assertEqual(result, getattr(reference, component)) 

157 result_deferred = deferred.get(component=component) 

158 self.assertEqual(result_deferred, result) 

159 

160 def tearDown(self): 

161 removeTestTempDir(self.root) 

162 

163 def create_butler(self, run, storageClass, datasetTypeName): 

164 butler = Butler(self.tmpConfigFile, run=run) 

165 

166 collections = set(butler.registry.queryCollections()) 

167 self.assertEqual(collections, set([run])) 

168 

169 # Create and register a DatasetType 

170 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

171 

172 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

173 

174 # Add needed Dimensions 

175 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

176 butler.registry.insertDimensionData( 

177 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

178 ) 

179 butler.registry.insertDimensionData( 

180 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

181 ) 

182 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

183 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

184 butler.registry.insertDimensionData( 

185 "visit", 

186 { 

187 "instrument": "DummyCamComp", 

188 "id": 423, 

189 "name": "fourtwentythree", 

190 "physical_filter": "d-r", 

191 "visit_system": 1, 

192 "datetime_begin": visit_start, 

193 "datetime_end": visit_end, 

194 }, 

195 ) 

196 

197 # Add more visits for some later tests 

198 for visit_id in (424, 425): 

199 butler.registry.insertDimensionData( 

200 "visit", 

201 { 

202 "instrument": "DummyCamComp", 

203 "id": visit_id, 

204 "name": f"fourtwentyfour_{visit_id}", 

205 "physical_filter": "d-r", 

206 "visit_system": 1, 

207 }, 

208 ) 

209 return butler, datasetType 

210 

211 def runPutGetTest(self, storageClass, datasetTypeName): 

212 # New datasets will be added to run and tag, but we will only look in 

213 # tag when looking up datasets. 

214 run = self.default_run 

215 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

216 

217 # Create and store a dataset 

218 metric = makeExampleMetrics() 

219 dataId = {"instrument": "DummyCamComp", "visit": 423} 

220 

221 # Create a DatasetRef for put 

222 refIn = DatasetRef(datasetType, dataId, id=None) 

223 

224 # Put with a preexisting id should fail 

225 with self.assertRaises(ValueError): 

226 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

227 

228 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

229 # and once with a DatasetType 

230 

231 # Keep track of any collections we add and do not clean up 

232 expected_collections = {run} 

233 

234 counter = 0 

235 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

236 # Since we are using subTest we can get cascading failures 

237 # here with the first attempt failing and the others failing 

238 # immediately because the dataset already exists. Work around 

239 # this by using a distinct run collection each time 

240 counter += 1 

241 this_run = f"put_run_{counter}" 

242 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

243 expected_collections.update({this_run}) 

244 

245 with self.subTest(args=args): 

246 ref = butler.put(metric, *args, run=this_run) 

247 self.assertIsInstance(ref, DatasetRef) 

248 

249 # Test getDirect 

250 metricOut = butler.getDirect(ref) 

251 self.assertEqual(metric, metricOut) 

252 # Test get 

253 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

254 self.assertEqual(metric, metricOut) 

255 # Test get with a datasetRef 

256 metricOut = butler.get(ref, collections=this_run) 

257 self.assertEqual(metric, metricOut) 

258 # Test getDeferred with dataId 

259 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

260 self.assertEqual(metric, metricOut) 

261 # Test getDeferred with a datasetRef 

262 metricOut = butler.getDeferred(ref, collections=this_run).get() 

263 self.assertEqual(metric, metricOut) 

264 # and deferred direct with ref 

265 metricOut = butler.getDirectDeferred(ref).get() 

266 self.assertEqual(metric, metricOut) 

267 

268 # Check we can get components 

269 if storageClass.isComposite(): 

270 self.assertGetComponents( 

271 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

272 ) 

273 

274 # Can the artifacts themselves be retrieved? 

275 if not butler.datastore.isEphemeral: 

276 root_uri = ResourcePath(self.root) 

277 

278 for preserve_path in (True, False): 

279 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

280 # Use copy so that we can test that overwrite 

281 # protection works (using "auto" for File URIs would 

282 # use hard links and subsequent transfer would work 

283 # because it knows they are the same file). 

284 transferred = butler.retrieveArtifacts( 

285 [ref], destination, preserve_path=preserve_path, transfer="copy" 

286 ) 

287 self.assertGreater(len(transferred), 0) 

288 artifacts = list(ResourcePath.findFileResources([destination])) 

289 self.assertEqual(set(transferred), set(artifacts)) 

290 

291 for artifact in transferred: 

292 path_in_destination = artifact.relative_to(destination) 

293 self.assertIsNotNone(path_in_destination) 

294 

295 # when path is not preserved there should not be 

296 # any path separators. 

297 num_seps = path_in_destination.count("/") 

298 if preserve_path: 

299 self.assertGreater(num_seps, 0) 

300 else: 

301 self.assertEqual(num_seps, 0) 

302 

303 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

304 n_uris = len(secondary_uris) 

305 if primary_uri: 

306 n_uris += 1 

307 self.assertEqual( 

308 len(artifacts), 

309 n_uris, 

310 "Comparing expected artifacts vs actual:" 

311 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

312 ) 

313 

314 if preserve_path: 

315 # No need to run these twice 

316 with self.assertRaises(ValueError): 

317 butler.retrieveArtifacts([ref], destination, transfer="move") 

318 

319 with self.assertRaises(FileExistsError): 

320 butler.retrieveArtifacts([ref], destination) 

321 

322 transferred_again = butler.retrieveArtifacts( 

323 [ref], destination, preserve_path=preserve_path, overwrite=True 

324 ) 

325 self.assertEqual(set(transferred_again), set(transferred)) 

326 

327 # Now remove the dataset completely. 

328 butler.pruneDatasets([ref], purge=True, unstore=True) 

329 # Lookup with original args should still fail. 

330 with self.assertRaises(LookupError): 

331 butler.datasetExists(*args, collections=this_run) 

332 # getDirect() should still fail. 

333 with self.assertRaises(FileNotFoundError): 

334 butler.getDirect(ref) 

335 # Registry shouldn't be able to find it by dataset_id anymore. 

336 self.assertIsNone(butler.registry.getDataset(ref.id)) 

337 

338 # Do explicit registry removal since we know they are 

339 # empty 

340 butler.registry.removeCollection(this_run) 

341 expected_collections.remove(this_run) 

342 

343 # Put the dataset again, since the last thing we did was remove it 

344 # and we want to use the default collection. 

345 ref = butler.put(metric, refIn) 

346 

347 # Get with parameters 

348 stop = 4 

349 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

350 self.assertNotEqual(metric, sliced) 

351 self.assertEqual(metric.summary, sliced.summary) 

352 self.assertEqual(metric.output, sliced.output) 

353 self.assertEqual(metric.data[:stop], sliced.data) 

354 # getDeferred with parameters 

355 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

356 self.assertNotEqual(metric, sliced) 

357 self.assertEqual(metric.summary, sliced.summary) 

358 self.assertEqual(metric.output, sliced.output) 

359 self.assertEqual(metric.data[:stop], sliced.data) 

360 # getDeferred with deferred parameters 

361 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

362 self.assertNotEqual(metric, sliced) 

363 self.assertEqual(metric.summary, sliced.summary) 

364 self.assertEqual(metric.output, sliced.output) 

365 self.assertEqual(metric.data[:stop], sliced.data) 

366 

367 if storageClass.isComposite(): 

368 # Check that components can be retrieved 

369 metricOut = butler.get(ref.datasetType.name, dataId) 

370 compNameS = ref.datasetType.componentTypeName("summary") 

371 compNameD = ref.datasetType.componentTypeName("data") 

372 summary = butler.get(compNameS, dataId) 

373 self.assertEqual(summary, metric.summary) 

374 data = butler.get(compNameD, dataId) 

375 self.assertEqual(data, metric.data) 

376 

377 if "counter" in storageClass.derivedComponents: 

378 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

379 self.assertEqual(count, len(data)) 

380 

381 count = butler.get( 

382 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

383 ) 

384 self.assertEqual(count, stop) 

385 

386 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

387 summary = butler.getDirect(compRef) 

388 self.assertEqual(summary, metric.summary) 

389 

390 # Create a Dataset type that has the same name but is inconsistent. 

391 inconsistentDatasetType = DatasetType( 

392 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

393 ) 

394 

395 # Getting with a dataset type that does not match registry fails 

396 with self.assertRaises(ValueError): 

397 butler.get(inconsistentDatasetType, dataId) 

398 

399 # Combining a DatasetRef with a dataId should fail 

400 with self.assertRaises(ValueError): 

401 butler.get(ref, dataId) 

402 # Getting with an explicit ref should fail if the id doesn't match 

403 with self.assertRaises(ValueError): 

404 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

405 

406 # Getting a dataset with unknown parameters should fail 

407 with self.assertRaises(KeyError): 

408 butler.get(ref, parameters={"unsupported": True}) 

409 

410 # Check we have a collection 

411 collections = set(butler.registry.queryCollections()) 

412 self.assertEqual(collections, expected_collections) 

413 

414 # Clean up to check that we can remove something that may have 

415 # already had a component removed 

416 butler.pruneDatasets([ref], unstore=True, purge=True) 

417 

418 # Check that we can configure a butler to accept a put even 

419 # if it already has the dataset in registry. 

420 ref = butler.put(metric, refIn) 

421 

422 # Repeat put will fail. 

423 with self.assertRaises(ConflictingDefinitionError): 

424 butler.put(metric, refIn) 

425 

426 # Remove the datastore entry. 

427 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

428 

429 # Put will still fail 

430 with self.assertRaises(ConflictingDefinitionError): 

431 butler.put(metric, refIn) 

432 

433 # Allow the put to succeed 

434 butler._allow_put_of_predefined_dataset = True 

435 ref2 = butler.put(metric, refIn) 

436 self.assertEqual(ref2.id, ref.id) 

437 

438 # A second put will still fail but with a different exception 

439 # than before. 

440 with self.assertRaises(ConflictingDefinitionError): 

441 butler.put(metric, refIn) 

442 

443 # Reset the flag to avoid confusion 

444 butler._allow_put_of_predefined_dataset = False 

445 

446 # Leave the dataset in place since some downstream tests require 

447 # something to be present 

448 

449 return butler 

450 

451 def testDeferredCollectionPassing(self): 

452 # Construct a butler with no run or collection, but make it writeable. 

453 butler = Butler(self.tmpConfigFile, writeable=True) 

454 # Create and register a DatasetType 

455 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

456 datasetType = self.addDatasetType( 

457 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

458 ) 

459 # Add needed Dimensions 

460 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

461 butler.registry.insertDimensionData( 

462 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

463 ) 

464 butler.registry.insertDimensionData( 

465 "visit", 

466 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

467 ) 

468 dataId = {"instrument": "DummyCamComp", "visit": 423} 

469 # Create dataset. 

470 metric = makeExampleMetrics() 

471 # Register a new run and put dataset. 

472 run = "deferred" 

473 self.assertTrue(butler.registry.registerRun(run)) 

474 # Second time it will be allowed but indicate no-op 

475 self.assertFalse(butler.registry.registerRun(run)) 

476 ref = butler.put(metric, datasetType, dataId, run=run) 

477 # Putting with no run should fail with TypeError. 

478 with self.assertRaises(CollectionError): 

479 butler.put(metric, datasetType, dataId) 

480 # Dataset should exist. 

481 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

482 # We should be able to get the dataset back, but with and without 

483 # a deferred dataset handle. 

484 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

485 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

486 # Trying to find the dataset without any collection is a TypeError. 

487 with self.assertRaises(CollectionError): 

488 butler.datasetExists(datasetType, dataId) 

489 with self.assertRaises(CollectionError): 

490 butler.get(datasetType, dataId) 

491 # Associate the dataset with a different collection. 

492 butler.registry.registerCollection("tagged") 

493 butler.registry.associate("tagged", [ref]) 

494 # Deleting the dataset from the new collection should make it findable 

495 # in the original collection. 

496 butler.pruneDatasets([ref], tags=["tagged"]) 

497 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

498 

499 

500class ButlerTests(ButlerPutGetTests): 

501 """Tests for Butler.""" 

502 

503 useTempRoot = True 

504 

505 def setUp(self): 

506 """Create a new butler root for each test.""" 

507 self.root = makeTestTempDir(TESTDIR) 

508 Butler.makeRepo(self.root, config=Config(self.configFile)) 

509 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

510 

511 def testConstructor(self): 

512 """Independent test of constructor.""" 

513 butler = Butler(self.tmpConfigFile, run=self.default_run) 

514 self.assertIsInstance(butler, Butler) 

515 

516 # Check that butler.yaml is added automatically. 

517 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

518 config_dir = self.tmpConfigFile[: -len(end)] 

519 butler = Butler(config_dir, run=self.default_run) 

520 self.assertIsInstance(butler, Butler) 

521 

522 # Even with a ResourcePath. 

523 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

524 self.assertIsInstance(butler, Butler) 

525 

526 collections = set(butler.registry.queryCollections()) 

527 self.assertEqual(collections, {self.default_run}) 

528 

529 # Check that some special characters can be included in run name. 

530 special_run = "u@b.c-A" 

531 butler_special = Butler(butler=butler, run=special_run) 

532 collections = set(butler_special.registry.queryCollections("*@*")) 

533 self.assertEqual(collections, {special_run}) 

534 

535 butler2 = Butler(butler=butler, collections=["other"]) 

536 self.assertEqual(butler2.collections, ("other",)) 

537 self.assertIsNone(butler2.run) 

538 self.assertIs(butler.datastore, butler2.datastore) 

539 

540 # Test that we can use an environment variable to find this 

541 # repository. 

542 butler_index = Config() 

543 butler_index["label"] = self.tmpConfigFile 

544 for suffix in (".yaml", ".json"): 

545 # Ensure that the content differs so that we know that 

546 # we aren't reusing the cache. 

547 bad_label = f"s3://bucket/not_real{suffix}" 

548 butler_index["bad_label"] = bad_label 

549 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

550 butler_index.dumpToUri(temp_file) 

551 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

552 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label"))) 

553 uri = Butler.get_repo_uri("bad_label") 

554 self.assertEqual(uri, ResourcePath(bad_label)) 

555 uri = Butler.get_repo_uri("label") 

556 butler = Butler(uri, writeable=False) 

557 self.assertIsInstance(butler, Butler) 

558 butler = Butler("label", writeable=False) 

559 self.assertIsInstance(butler, Butler) 

560 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

561 Butler("not_there", writeable=False) 

562 with self.assertRaises(KeyError) as cm: 

563 Butler.get_repo_uri("missing") 

564 self.assertIn("not known to", str(cm.exception)) 

565 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

566 with self.assertRaises(FileNotFoundError): 

567 Butler.get_repo_uri("label") 

568 self.assertEqual(Butler.get_known_repos(), set()) 

569 with self.assertRaises(KeyError) as cm: 

570 # No environment variable set. 

571 Butler.get_repo_uri("label") 

572 self.assertIn("No repository index defined", str(cm.exception)) 

573 with self.assertRaisesRegex(FileNotFoundError, "no known aliases"): 

574 # No aliases registered. 

575 Butler("not_there") 

576 self.assertEqual(Butler.get_known_repos(), set()) 

577 

578 def testBasicPutGet(self): 

579 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

580 self.runPutGetTest(storageClass, "test_metric") 

581 

582 def testCompositePutGetConcrete(self): 

583 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

584 butler = self.runPutGetTest(storageClass, "test_metric") 

585 

586 # Should *not* be disassembled 

587 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

588 self.assertEqual(len(datasets), 1) 

589 uri, components = butler.getURIs(datasets[0]) 

590 self.assertIsInstance(uri, ResourcePath) 

591 self.assertFalse(components) 

592 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

593 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

594 

595 # Predicted dataset 

596 dataId = {"instrument": "DummyCamComp", "visit": 424} 

597 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

598 self.assertFalse(components) 

599 self.assertIsInstance(uri, ResourcePath) 

600 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

601 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

602 

603 def testCompositePutGetVirtual(self): 

604 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

605 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

606 

607 # Should be disassembled 

608 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

609 self.assertEqual(len(datasets), 1) 

610 uri, components = butler.getURIs(datasets[0]) 

611 

612 if butler.datastore.isEphemeral: 

613 # Never disassemble in-memory datastore 

614 self.assertIsInstance(uri, ResourcePath) 

615 self.assertFalse(components) 

616 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

617 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

618 else: 

619 self.assertIsNone(uri) 

620 self.assertEqual(set(components), set(storageClass.components)) 

621 for compuri in components.values(): 

622 self.assertIsInstance(compuri, ResourcePath) 

623 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

624 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

625 

626 # Predicted dataset 

627 dataId = {"instrument": "DummyCamComp", "visit": 424} 

628 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

629 

630 if butler.datastore.isEphemeral: 

631 # Never disassembled 

632 self.assertIsInstance(uri, ResourcePath) 

633 self.assertFalse(components) 

634 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

635 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

636 else: 

637 self.assertIsNone(uri) 

638 self.assertEqual(set(components), set(storageClass.components)) 

639 for compuri in components.values(): 

640 self.assertIsInstance(compuri, ResourcePath) 

641 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

642 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

643 

644 def testStorageClassOverrideGet(self): 

645 """Test storage class conversion on get with override.""" 

646 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

647 datasetTypeName = "anything" 

648 run = self.default_run 

649 

650 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

651 

652 # Create and store a dataset. 

653 metric = makeExampleMetrics() 

654 dataId = {"instrument": "DummyCamComp", "visit": 423} 

655 

656 ref = butler.put(metric, datasetType, dataId) 

657 

658 # Return native type. 

659 retrieved = butler.get(ref) 

660 self.assertEqual(retrieved, metric) 

661 

662 # Specify an override. 

663 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion") 

664 model = butler.getDirect(ref, storageClass=new_sc) 

665 self.assertNotEqual(type(model), type(retrieved)) 

666 self.assertIs(type(model), new_sc.pytype) 

667 self.assertEqual(retrieved, model) 

668 

669 # Defer but override later. 

670 deferred = butler.getDirectDeferred(ref) 

671 model = deferred.get(storageClass=new_sc) 

672 self.assertIs(type(model), new_sc.pytype) 

673 self.assertEqual(retrieved, model) 

674 

675 # Defer but override up front. 

676 deferred = butler.getDirectDeferred(ref, storageClass=new_sc) 

677 model = deferred.get() 

678 self.assertIs(type(model), new_sc.pytype) 

679 self.assertEqual(retrieved, model) 

680 

681 # Retrieve a component. Should be a tuple. 

682 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple") 

683 self.assertIs(type(data), tuple) 

684 self.assertEqual(data, tuple(retrieved.data)) 

685 

686 # Parameter on the write storage class should work regardless 

687 # of read storage class. 

688 data = butler.get( 

689 "anything.data", 

690 dataId, 

691 storageClass="StructuredDataDataTestTuple", 

692 parameters={"slice": slice(2, 4)}, 

693 ) 

694 self.assertEqual(len(data), 2) 

695 

696 # Try a parameter that is known to the read storage class but not 

697 # the write storage class. 

698 with self.assertRaises(KeyError): 

699 butler.get( 

700 "anything.data", 

701 dataId, 

702 storageClass="StructuredDataDataTestTuple", 

703 parameters={"xslice": slice(2, 4)}, 

704 ) 

705 

706 def testPytypePutCoercion(self): 

707 """Test python type coercion on Butler.get and put.""" 

708 

709 # Store some data with the normal example storage class. 

710 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

711 datasetTypeName = "test_metric" 

712 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

713 

714 dataId = {"instrument": "DummyCamComp", "visit": 423} 

715 

716 # Put a dict and this should coerce to a MetricsExample 

717 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

718 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

719 test_metric = butler.getDirect(metric_ref) 

720 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

721 self.assertEqual(test_metric.summary, test_dict["summary"]) 

722 self.assertEqual(test_metric.output, test_dict["output"]) 

723 

724 # Check that the put still works if a DatasetType is given with 

725 # a definition matching this python type. 

726 registry_type = butler.registry.getDatasetType(datasetTypeName) 

727 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

728 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

729 self.assertEqual(metric2_ref.datasetType, registry_type) 

730 

731 # The get will return the type expected by registry. 

732 test_metric2 = butler.getDirect(metric2_ref) 

733 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

734 

735 # Make a new DatasetRef with the compatible but different DatasetType. 

736 # This should now return a dict. 

737 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

738 test_dict2 = butler.getDirect(new_ref) 

739 self.assertEqual(get_full_type_name(test_dict2), "dict") 

740 

741 # Get it again with the wrong dataset type definition using get() 

742 # rather than getDirect(). This should be consistent with getDirect() 

743 # behavior and return the type of the DatasetType. 

744 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

745 self.assertEqual(get_full_type_name(test_dict3), "dict") 

746 

747 def testIngest(self): 

748 butler = Butler(self.tmpConfigFile, run=self.default_run) 

749 

750 # Create and register a DatasetType 

751 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

752 

753 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

754 datasetTypeName = "metric" 

755 

756 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

757 

758 # Add needed Dimensions 

759 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

760 butler.registry.insertDimensionData( 

761 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

762 ) 

763 for detector in (1, 2): 

764 butler.registry.insertDimensionData( 

765 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

766 ) 

767 

768 butler.registry.insertDimensionData( 

769 "visit", 

770 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

771 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

772 ) 

773 

774 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

775 dataRoot = os.path.join(TESTDIR, "data", "basic") 

776 datasets = [] 

777 for detector in (1, 2): 

778 detector_name = f"detector_{detector}" 

779 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

780 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

781 # Create a DatasetRef for ingest 

782 refIn = DatasetRef(datasetType, dataId, id=None) 

783 

784 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

785 

786 butler.ingest(*datasets, transfer="copy") 

787 

788 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

789 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

790 

791 metrics1 = butler.get(datasetTypeName, dataId1) 

792 metrics2 = butler.get(datasetTypeName, dataId2) 

793 self.assertNotEqual(metrics1, metrics2) 

794 

795 # Compare URIs 

796 uri1 = butler.getURI(datasetTypeName, dataId1) 

797 uri2 = butler.getURI(datasetTypeName, dataId2) 

798 self.assertNotEqual(uri1, uri2) 

799 

800 # Now do a multi-dataset but single file ingest 

801 metricFile = os.path.join(dataRoot, "detectors.yaml") 

802 refs = [] 

803 for detector in (1, 2): 

804 detector_name = f"detector_{detector}" 

805 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

806 # Create a DatasetRef for ingest 

807 refs.append(DatasetRef(datasetType, dataId, id=None)) 

808 

809 # Test "move" transfer to ensure that the files themselves 

810 # have disappeared following ingest. 

811 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile: 

812 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy") 

813 

814 datasets = [] 

815 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter)) 

816 

817 butler.ingest(*datasets, transfer="move", record_validation_info=False) 

818 self.assertFalse(tempFile.exists()) 

819 

820 # Check that the datastore recorded no file size. 

821 # Not all datastores can support this. 

822 try: 

823 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0]) 

824 self.assertEqual(infos[0].file_size, -1) 

825 except AttributeError: 

826 pass 

827 

828 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

829 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

830 

831 multi1 = butler.get(datasetTypeName, dataId1) 

832 multi2 = butler.get(datasetTypeName, dataId2) 

833 

834 self.assertEqual(multi1, metrics1) 

835 self.assertEqual(multi2, metrics2) 

836 

837 # Compare URIs 

838 uri1 = butler.getURI(datasetTypeName, dataId1) 

839 uri2 = butler.getURI(datasetTypeName, dataId2) 

840 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

841 

842 # Test that removing one does not break the second 

843 # This line will issue a warning log message for a ChainedDatastore 

844 # that uses an InMemoryDatastore since in-memory can not ingest 

845 # files. 

846 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

847 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

848 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

849 multi2b = butler.get(datasetTypeName, dataId2) 

850 self.assertEqual(multi2, multi2b) 

851 

852 def testPickle(self): 

853 """Test pickle support.""" 

854 butler = Butler(self.tmpConfigFile, run=self.default_run) 

855 butlerOut = pickle.loads(pickle.dumps(butler)) 

856 self.assertIsInstance(butlerOut, Butler) 

857 self.assertEqual(butlerOut._config, butler._config) 

858 self.assertEqual(butlerOut.collections, butler.collections) 

859 self.assertEqual(butlerOut.run, butler.run) 

860 

861 def testGetDatasetTypes(self): 

862 butler = Butler(self.tmpConfigFile, run=self.default_run) 

863 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

864 dimensionEntries = [ 

865 ( 

866 "instrument", 

867 {"instrument": "DummyCam"}, 

868 {"instrument": "DummyHSC"}, 

869 {"instrument": "DummyCamComp"}, 

870 ), 

871 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

872 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

873 ] 

874 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

875 # Add needed Dimensions 

876 for args in dimensionEntries: 

877 butler.registry.insertDimensionData(*args) 

878 

879 # When a DatasetType is added to the registry entries are not created 

880 # for components but querying them can return the components. 

881 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

882 components = set() 

883 for datasetTypeName in datasetTypeNames: 

884 # Create and register a DatasetType 

885 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

886 

887 for componentName in storageClass.components: 

888 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

889 

890 fromRegistry: set[DatasetType] = set() 

891 for parent_dataset_type in butler.registry.queryDatasetTypes(): 

892 fromRegistry.add(parent_dataset_type) 

893 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes()) 

894 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

895 

896 # Now that we have some dataset types registered, validate them 

897 butler.validateConfiguration( 

898 ignore=[ 

899 "test_metric_comp", 

900 "metric3", 

901 "metric5", 

902 "calexp", 

903 "DummySC", 

904 "datasetType.component", 

905 "random_data", 

906 "random_data_2", 

907 ] 

908 ) 

909 

910 # Add a new datasetType that will fail template validation 

911 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

912 if self.validationCanFail: 

913 with self.assertRaises(ValidationError): 

914 butler.validateConfiguration() 

915 

916 # Rerun validation but with a subset of dataset type names 

917 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

918 

919 # Rerun validation but ignore the bad datasetType 

920 butler.validateConfiguration( 

921 ignore=[ 

922 "test_metric_comp", 

923 "metric3", 

924 "metric5", 

925 "calexp", 

926 "DummySC", 

927 "datasetType.component", 

928 "random_data", 

929 "random_data_2", 

930 ] 

931 ) 

932 

933 def testTransaction(self): 

934 butler = Butler(self.tmpConfigFile, run=self.default_run) 

935 datasetTypeName = "test_metric" 

936 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

937 dimensionEntries = ( 

938 ("instrument", {"instrument": "DummyCam"}), 

939 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

940 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

941 ) 

942 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

943 metric = makeExampleMetrics() 

944 dataId = {"instrument": "DummyCam", "visit": 42} 

945 # Create and register a DatasetType 

946 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

947 with self.assertRaises(TransactionTestError): 

948 with butler.transaction(): 

949 # Add needed Dimensions 

950 for args in dimensionEntries: 

951 butler.registry.insertDimensionData(*args) 

952 # Store a dataset 

953 ref = butler.put(metric, datasetTypeName, dataId) 

954 self.assertIsInstance(ref, DatasetRef) 

955 # Test getDirect 

956 metricOut = butler.getDirect(ref) 

957 self.assertEqual(metric, metricOut) 

958 # Test get 

959 metricOut = butler.get(datasetTypeName, dataId) 

960 self.assertEqual(metric, metricOut) 

961 # Check we can get components 

962 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

963 raise TransactionTestError("This should roll back the entire transaction") 

964 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

965 butler.registry.expandDataId(dataId) 

966 # Should raise LookupError for missing data ID value 

967 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

968 butler.get(datasetTypeName, dataId) 

969 # Also check explicitly if Dataset entry is missing 

970 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

971 # Direct retrieval should not find the file in the Datastore 

972 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

973 butler.getDirect(ref) 

974 

975 def testMakeRepo(self): 

976 """Test that we can write butler configuration to a new repository via 

977 the Butler.makeRepo interface and then instantiate a butler from the 

978 repo root. 

979 """ 

980 # Do not run the test if we know this datastore configuration does 

981 # not support a file system root 

982 if self.fullConfigKey is None: 

983 return 

984 

985 # create two separate directories 

986 root1 = tempfile.mkdtemp(dir=self.root) 

987 root2 = tempfile.mkdtemp(dir=self.root) 

988 

989 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

990 limited = Config(self.configFile) 

991 butler1 = Butler(butlerConfig) 

992 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

993 full = Config(self.tmpConfigFile) 

994 butler2 = Butler(butlerConfig) 

995 # Butlers should have the same configuration regardless of whether 

996 # defaults were expanded. 

997 self.assertEqual(butler1._config, butler2._config) 

998 # Config files loaded directly should not be the same. 

999 self.assertNotEqual(limited, full) 

1000 # Make sure "limited" doesn't have a few keys we know it should be 

1001 # inheriting from defaults. 

1002 self.assertIn(self.fullConfigKey, full) 

1003 self.assertNotIn(self.fullConfigKey, limited) 

1004 

1005 # Collections don't appear until something is put in them 

1006 collections1 = set(butler1.registry.queryCollections()) 

1007 self.assertEqual(collections1, set()) 

1008 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1009 

1010 # Check that a config with no associated file name will not 

1011 # work properly with relocatable Butler repo 

1012 butlerConfig.configFile = None 

1013 with self.assertRaises(ValueError): 

1014 Butler(butlerConfig) 

1015 

1016 with self.assertRaises(FileExistsError): 

1017 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1018 

1019 def testStringification(self): 

1020 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1021 butlerStr = str(butler) 

1022 

1023 if self.datastoreStr is not None: 

1024 for testStr in self.datastoreStr: 

1025 self.assertIn(testStr, butlerStr) 

1026 if self.registryStr is not None: 

1027 self.assertIn(self.registryStr, butlerStr) 

1028 

1029 datastoreName = butler.datastore.name 

1030 if self.datastoreName is not None: 

1031 for testStr in self.datastoreName: 

1032 self.assertIn(testStr, datastoreName) 

1033 

1034 def testButlerRewriteDataId(self): 

1035 """Test that dataIds can be rewritten based on dimension records.""" 

1036 

1037 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1038 

1039 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1040 datasetTypeName = "random_data" 

1041 

1042 # Create dimension records. 

1043 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1044 butler.registry.insertDimensionData( 

1045 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1046 ) 

1047 butler.registry.insertDimensionData( 

1048 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1049 ) 

1050 

1051 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1052 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1053 butler.registry.registerDatasetType(datasetType) 

1054 

1055 n_exposures = 5 

1056 dayobs = 20210530 

1057 

1058 for i in range(n_exposures): 

1059 butler.registry.insertDimensionData( 

1060 "exposure", 

1061 { 

1062 "instrument": "DummyCamComp", 

1063 "id": i, 

1064 "obs_id": f"exp{i}", 

1065 "seq_num": i, 

1066 "day_obs": dayobs, 

1067 "physical_filter": "d-r", 

1068 }, 

1069 ) 

1070 

1071 # Write some data. 

1072 for i in range(n_exposures): 

1073 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1074 

1075 # Use the seq_num for the put to test rewriting. 

1076 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1077 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1078 

1079 # Check that the exposure is correct in the dataId 

1080 self.assertEqual(ref.dataId["exposure"], i) 

1081 

1082 # and check that we can get the dataset back with the same dataId 

1083 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1084 self.assertEqual(new_metric, metric) 

1085 

1086 

1087class FileDatastoreButlerTests(ButlerTests): 

1088 """Common tests and specialization of ButlerTests for butlers backed 

1089 by datastores that inherit from FileDatastore. 

1090 """ 

1091 

1092 def checkFileExists(self, root, relpath): 

1093 """Checks if file exists at a given path (relative to root). 

1094 

1095 Test testPutTemplates verifies actual physical existance of the files 

1096 in the requested location. 

1097 """ 

1098 uri = ResourcePath(root, forceDirectory=True) 

1099 return uri.join(relpath).exists() 

1100 

1101 def testPutTemplates(self): 

1102 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1103 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1104 

1105 # Add needed Dimensions 

1106 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1107 butler.registry.insertDimensionData( 

1108 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1109 ) 

1110 butler.registry.insertDimensionData( 

1111 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1112 ) 

1113 butler.registry.insertDimensionData( 

1114 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1115 ) 

1116 

1117 # Create and store a dataset 

1118 metric = makeExampleMetrics() 

1119 

1120 # Create two almost-identical DatasetTypes (both will use default 

1121 # template) 

1122 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1123 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1124 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1125 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1126 

1127 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1128 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1129 

1130 # Put with exactly the data ID keys needed 

1131 ref = butler.put(metric, "metric1", dataId1) 

1132 uri = butler.getURI(ref) 

1133 self.assertTrue(uri.exists()) 

1134 self.assertTrue( 

1135 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle") 

1136 ) 

1137 

1138 # Check the template based on dimensions 

1139 if hasattr(butler.datastore, "templates"): 

1140 butler.datastore.templates.validateTemplates([ref]) 

1141 

1142 # Put with extra data ID keys (physical_filter is an optional 

1143 # dependency); should not change template (at least the way we're 

1144 # defining them to behave now; the important thing is that they 

1145 # must be consistent). 

1146 ref = butler.put(metric, "metric2", dataId2) 

1147 uri = butler.getURI(ref) 

1148 self.assertTrue(uri.exists()) 

1149 self.assertTrue( 

1150 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle") 

1151 ) 

1152 

1153 # Check the template based on dimensions 

1154 if hasattr(butler.datastore, "templates"): 

1155 butler.datastore.templates.validateTemplates([ref]) 

1156 

1157 # Use a template that has a typo in dimension record metadata. 

1158 # Easier to test with a butler that has a ref with records attached. 

1159 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1160 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1161 path = template.format(ref) 

1162 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1163 

1164 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1165 with self.assertRaises(KeyError): 

1166 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1167 template.format(ref) 

1168 

1169 # Now use a file template that will not result in unique filenames 

1170 with self.assertRaises(FileTemplateValidationError): 

1171 butler.put(metric, "metric3", dataId1) 

1172 

1173 def testImportExport(self): 

1174 # Run put/get tests just to create and populate a repo. 

1175 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1176 self.runImportExportTest(storageClass) 

1177 

1178 @unittest.expectedFailure 

1179 def testImportExportVirtualComposite(self): 

1180 # Run put/get tests just to create and populate a repo. 

1181 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1182 self.runImportExportTest(storageClass) 

1183 

1184 def runImportExportTest(self, storageClass): 

1185 """This test does an export to a temp directory and an import back 

1186 into a new temp directory repo. It does not assume a posix datastore""" 

1187 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1188 

1189 # Test that we must have a file extension. 

1190 with self.assertRaises(ValueError): 

1191 with exportButler.export(filename="dump", directory=".") as export: 

1192 pass 

1193 

1194 # Test that unknown format is not allowed. 

1195 with self.assertRaises(ValueError): 

1196 with exportButler.export(filename="dump.fits", directory=".") as export: 

1197 pass 

1198 

1199 # Test that the repo actually has at least one dataset. 

1200 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1201 self.assertGreater(len(datasets), 0) 

1202 # Add a DimensionRecord that's unused by those datasets. 

1203 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1204 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1205 # Export and then import datasets. 

1206 with safeTestTempDir(TESTDIR) as exportDir: 

1207 exportFile = os.path.join(exportDir, "exports.yaml") 

1208 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1209 export.saveDatasets(datasets) 

1210 # Export the same datasets again. This should quietly do 

1211 # nothing because of internal deduplication, and it shouldn't 

1212 # complain about being asked to export the "htm7" elements even 

1213 # though there aren't any in these datasets or in the database. 

1214 export.saveDatasets(datasets, elements=["htm7"]) 

1215 # Save one of the data IDs again; this should be harmless 

1216 # because of internal deduplication. 

1217 export.saveDataIds([datasets[0].dataId]) 

1218 # Save some dimension records directly. 

1219 export.saveDimensionData("skymap", [skymapRecord]) 

1220 self.assertTrue(os.path.exists(exportFile)) 

1221 with safeTestTempDir(TESTDIR) as importDir: 

1222 # We always want this to be a local posix butler 

1223 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1224 # Calling script.butlerImport tests the implementation of the 

1225 # butler command line interface "import" subcommand. Functions 

1226 # in the script folder are generally considered protected and 

1227 # should not be used as public api. 

1228 with open(exportFile, "r") as f: 

1229 script.butlerImport( 

1230 importDir, 

1231 export_file=f, 

1232 directory=exportDir, 

1233 transfer="auto", 

1234 skip_dimensions=None, 

1235 reuse_ids=False, 

1236 ) 

1237 importButler = Butler(importDir, run=self.default_run) 

1238 for ref in datasets: 

1239 with self.subTest(ref=ref): 

1240 # Test for existence by passing in the DatasetType and 

1241 # data ID separately, to avoid lookup by dataset_id. 

1242 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1243 self.assertEqual( 

1244 list(importButler.registry.queryDimensionRecords("skymap")), 

1245 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)], 

1246 ) 

1247 

1248 def testRemoveRuns(self): 

1249 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1250 butler = Butler(self.tmpConfigFile, writeable=True) 

1251 # Load registry data with dimensions to hang datasets off of. 

1252 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1253 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1254 # Add some RUN-type collection. 

1255 run1 = "run1" 

1256 butler.registry.registerRun(run1) 

1257 run2 = "run2" 

1258 butler.registry.registerRun(run2) 

1259 # put a dataset in each 

1260 metric = makeExampleMetrics() 

1261 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1262 datasetType = self.addDatasetType( 

1263 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1264 ) 

1265 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1266 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1267 uri1 = butler.getURI(ref1, collections=[run1]) 

1268 uri2 = butler.getURI(ref2, collections=[run2]) 

1269 # Remove from both runs with different values for unstore. 

1270 butler.removeRuns([run1], unstore=True) 

1271 butler.removeRuns([run2], unstore=False) 

1272 # Should be nothing in registry for either one, and datastore should 

1273 # not think either exists. 

1274 with self.assertRaises(MissingCollectionError): 

1275 butler.registry.getCollectionType(run1) 

1276 with self.assertRaises(MissingCollectionError): 

1277 butler.registry.getCollectionType(run2) 

1278 self.assertFalse(butler.datastore.exists(ref1)) 

1279 self.assertFalse(butler.datastore.exists(ref2)) 

1280 # The ref we unstored should be gone according to the URI, but the 

1281 # one we forgot should still be around. 

1282 self.assertFalse(uri1.exists()) 

1283 self.assertTrue(uri2.exists()) 

1284 

1285 

1286class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1287 """PosixDatastore specialization of a butler""" 

1288 

1289 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1290 fullConfigKey = ".datastore.formatters" 

1291 validationCanFail = True 

1292 datastoreStr = ["/tmp"] 

1293 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1294 registryStr = "/gen3.sqlite3" 

1295 

1296 def testPathConstructor(self): 

1297 """Independent test of constructor using PathLike.""" 

1298 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1299 self.assertIsInstance(butler, Butler) 

1300 

1301 # And again with a Path object with the butler yaml 

1302 path = pathlib.Path(self.tmpConfigFile) 

1303 butler = Butler(path, writeable=False) 

1304 self.assertIsInstance(butler, Butler) 

1305 

1306 # And again with a Path object without the butler yaml 

1307 # (making sure we skip it if the tmp config doesn't end 

1308 # in butler.yaml -- which is the case for a subclass) 

1309 if self.tmpConfigFile.endswith("butler.yaml"): 

1310 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1311 butler = Butler(path, writeable=False) 

1312 self.assertIsInstance(butler, Butler) 

1313 

1314 def testExportTransferCopy(self): 

1315 """Test local export using all transfer modes""" 

1316 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1317 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1318 # Test that the repo actually has at least one dataset. 

1319 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1320 self.assertGreater(len(datasets), 0) 

1321 uris = [exportButler.getURI(d) for d in datasets] 

1322 datastoreRoot = exportButler.datastore.root 

1323 

1324 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1325 

1326 for path in pathsInStore: 

1327 # Assume local file system 

1328 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1329 

1330 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1331 with safeTestTempDir(TESTDIR) as exportDir: 

1332 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1333 export.saveDatasets(datasets) 

1334 for path in pathsInStore: 

1335 self.assertTrue( 

1336 self.checkFileExists(exportDir, path), 

1337 f"Check that mode {transfer} exported files", 

1338 ) 

1339 

1340 def testPruneDatasets(self): 

1341 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1342 butler = Butler(self.tmpConfigFile, writeable=True) 

1343 # Load registry data with dimensions to hang datasets off of. 

1344 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1345 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1346 # Add some RUN-type collections. 

1347 run1 = "run1" 

1348 butler.registry.registerRun(run1) 

1349 run2 = "run2" 

1350 butler.registry.registerRun(run2) 

1351 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1352 # different runs. ref3 has a different data ID. 

1353 metric = makeExampleMetrics() 

1354 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1355 datasetType = self.addDatasetType( 

1356 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1357 ) 

1358 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1359 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1360 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1361 

1362 # Simple prune. 

1363 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1364 with self.assertRaises(LookupError): 

1365 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1) 

1366 

1367 # Put data back. 

1368 ref1 = butler.put(metric, ref1.unresolved(), run=run1) 

1369 ref2 = butler.put(metric, ref2.unresolved(), run=run2) 

1370 ref3 = butler.put(metric, ref3.unresolved(), run=run1) 

1371 

1372 # Check that in normal mode, deleting the record will lead to 

1373 # trash not touching the file. 

1374 uri1 = butler.datastore.getURI(ref1) 

1375 butler.datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table 

1376 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id}) 

1377 butler.datastore.trash(ref1) 

1378 butler.datastore.emptyTrash() 

1379 self.assertTrue(uri1.exists()) 

1380 uri1.remove() # Clean it up. 

1381 

1382 # Simulate execution butler setup by deleting the datastore 

1383 # record but keeping the file around and trusting. 

1384 butler.datastore.trustGetRequest = True 

1385 uri2 = butler.datastore.getURI(ref2) 

1386 uri3 = butler.datastore.getURI(ref3) 

1387 self.assertTrue(uri2.exists()) 

1388 self.assertTrue(uri3.exists()) 

1389 

1390 # Remove the datastore record. 

1391 butler.datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table 

1392 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id}) 

1393 self.assertTrue(uri2.exists()) 

1394 butler.datastore.trash([ref2, ref3]) 

1395 # Immediate removal for ref2 file 

1396 self.assertFalse(uri2.exists()) 

1397 # But ref3 has to wait for the empty. 

1398 self.assertTrue(uri3.exists()) 

1399 butler.datastore.emptyTrash() 

1400 self.assertFalse(uri3.exists()) 

1401 

1402 # Clear out the datasets from registry. 

1403 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1404 

1405 def testPytypeCoercion(self): 

1406 """Test python type coercion on Butler.get and put.""" 

1407 

1408 # Store some data with the normal example storage class. 

1409 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1410 datasetTypeName = "test_metric" 

1411 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1412 

1413 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1414 metric = butler.get(datasetTypeName, dataId=dataId) 

1415 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1416 

1417 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1418 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1419 

1420 # Now need to hack the registry dataset type definition. 

1421 # There is no API for this. 

1422 manager = butler.registry._managers.datasets 

1423 manager._db.update( 

1424 manager._static.dataset_type, 

1425 {"name": datasetTypeName}, 

1426 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1427 ) 

1428 

1429 # Force reset of dataset type cache 

1430 butler.registry.refresh() 

1431 

1432 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1433 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1434 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1435 

1436 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1437 self.assertNotEqual(type(metric_model), type(metric)) 

1438 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1439 

1440 # Put the model and read it back to show that everything now 

1441 # works as normal. 

1442 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1443 metric_model_new = butler.get(metric_ref) 

1444 self.assertEqual(metric_model_new, metric_model) 

1445 

1446 # Hack the storage class again to something that will fail on the 

1447 # get with no conversion class. 

1448 manager._db.update( 

1449 manager._static.dataset_type, 

1450 {"name": datasetTypeName}, 

1451 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1452 ) 

1453 butler.registry.refresh() 

1454 

1455 with self.assertRaises(ValueError): 

1456 butler.get(datasetTypeName, dataId=dataId) 

1457 

1458 

1459@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1460class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1461 """PosixDatastore specialization of a butler using Postgres""" 

1462 

1463 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1464 fullConfigKey = ".datastore.formatters" 

1465 validationCanFail = True 

1466 datastoreStr = ["/tmp"] 

1467 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1468 registryStr = "PostgreSQL@test" 

1469 

1470 @staticmethod 

1471 def _handler(postgresql): 

1472 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1473 with engine.begin() as connection: 

1474 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1475 

1476 @classmethod 

1477 def setUpClass(cls): 

1478 # Create the postgres test server. 

1479 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1480 cache_initialized_db=True, on_initialized=cls._handler 

1481 ) 

1482 super().setUpClass() 

1483 

1484 @classmethod 

1485 def tearDownClass(cls): 

1486 # Clean up any lingering SQLAlchemy engines/connections 

1487 # so they're closed before we shut down the server. 

1488 gc.collect() 

1489 cls.postgresql.clear_cache() 

1490 super().tearDownClass() 

1491 

1492 def setUp(self): 

1493 self.server = self.postgresql() 

1494 

1495 # Need to add a registry section to the config. 

1496 self._temp_config = False 

1497 config = Config(self.configFile) 

1498 config["registry", "db"] = self.server.url() 

1499 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1500 config.dump(fh) 

1501 self.configFile = fh.name 

1502 self._temp_config = True 

1503 super().setUp() 

1504 

1505 def tearDown(self): 

1506 self.server.stop() 

1507 if self._temp_config and os.path.exists(self.configFile): 

1508 os.remove(self.configFile) 

1509 super().tearDown() 

1510 

1511 def testMakeRepo(self): 

1512 # The base class test assumes that it's using sqlite and assumes 

1513 # the config file is acceptable to sqlite. 

1514 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1515 

1516 

1517class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1518 """InMemoryDatastore specialization of a butler""" 

1519 

1520 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1521 fullConfigKey = None 

1522 useTempRoot = False 

1523 validationCanFail = False 

1524 datastoreStr = ["datastore='InMemory"] 

1525 datastoreName = ["InMemoryDatastore@"] 

1526 registryStr = "/gen3.sqlite3" 

1527 

1528 def testIngest(self): 

1529 pass 

1530 

1531 

1532class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1533 """PosixDatastore specialization""" 

1534 

1535 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1536 fullConfigKey = ".datastore.datastores.1.formatters" 

1537 validationCanFail = True 

1538 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1539 datastoreName = [ 

1540 "InMemoryDatastore@", 

1541 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1542 "SecondDatastore", 

1543 ] 

1544 registryStr = "/gen3.sqlite3" 

1545 

1546 

1547class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1548 """Test that a yaml file in one location can refer to a root in another.""" 

1549 

1550 datastoreStr = ["dir1"] 

1551 # Disable the makeRepo test since we are deliberately not using 

1552 # butler.yaml as the config name. 

1553 fullConfigKey = None 

1554 

1555 def setUp(self): 

1556 self.root = makeTestTempDir(TESTDIR) 

1557 

1558 # Make a new repository in one place 

1559 self.dir1 = os.path.join(self.root, "dir1") 

1560 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1561 

1562 # Move the yaml file to a different place and add a "root" 

1563 self.dir2 = os.path.join(self.root, "dir2") 

1564 os.makedirs(self.dir2, exist_ok=True) 

1565 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1566 config = Config(configFile1) 

1567 config["root"] = self.dir1 

1568 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1569 config.dumpToUri(configFile2) 

1570 os.remove(configFile1) 

1571 self.tmpConfigFile = configFile2 

1572 

1573 def testFileLocations(self): 

1574 self.assertNotEqual(self.dir1, self.dir2) 

1575 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1576 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1577 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1578 

1579 

1580class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1581 """Test that a config file created by makeRepo outside of repo works.""" 

1582 

1583 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1584 

1585 def setUp(self): 

1586 self.root = makeTestTempDir(TESTDIR) 

1587 self.root2 = makeTestTempDir(TESTDIR) 

1588 

1589 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1590 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1591 

1592 def tearDown(self): 

1593 if os.path.exists(self.root2): 

1594 shutil.rmtree(self.root2, ignore_errors=True) 

1595 super().tearDown() 

1596 

1597 def testConfigExistence(self): 

1598 c = Config(self.tmpConfigFile) 

1599 uri_config = ResourcePath(c["root"]) 

1600 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1601 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1602 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1603 

1604 def testPutGet(self): 

1605 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1606 self.runPutGetTest(storageClass, "test_metric") 

1607 

1608 

1609class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1610 """Test that a config file created by makeRepo outside of repo works.""" 

1611 

1612 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1613 

1614 def setUp(self): 

1615 self.root = makeTestTempDir(TESTDIR) 

1616 self.root2 = makeTestTempDir(TESTDIR) 

1617 

1618 self.tmpConfigFile = self.root2 

1619 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1620 

1621 def testConfigExistence(self): 

1622 # Append the yaml file else Config constructor does not know the file 

1623 # type. 

1624 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1625 super().testConfigExistence() 

1626 

1627 

1628class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1629 """Test that a config file created by makeRepo outside of repo works.""" 

1630 

1631 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1632 

1633 def setUp(self): 

1634 self.root = makeTestTempDir(TESTDIR) 

1635 self.root2 = makeTestTempDir(TESTDIR) 

1636 

1637 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1638 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1639 

1640 

1641@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1642class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1643 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1644 a local in-memory SqlRegistry. 

1645 """ 

1646 

1647 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1648 fullConfigKey = None 

1649 validationCanFail = True 

1650 

1651 bucketName = "anybucketname" 

1652 """Name of the Bucket that will be used in the tests. The name is read from 

1653 the config file used with the tests during set-up. 

1654 """ 

1655 

1656 root = "butlerRoot/" 

1657 """Root repository directory expected to be used in case useTempRoot=False. 

1658 Otherwise the root is set to a 20 characters long randomly generated string 

1659 during set-up. 

1660 """ 

1661 

1662 datastoreStr = [f"datastore={root}"] 

1663 """Contains all expected root locations in a format expected to be 

1664 returned by Butler stringification. 

1665 """ 

1666 

1667 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1668 """The expected format of the S3 Datastore string.""" 

1669 

1670 registryStr = "/gen3.sqlite3" 

1671 """Expected format of the Registry string.""" 

1672 

1673 mock_s3 = mock_s3() 

1674 """The mocked s3 interface from moto.""" 

1675 

1676 def genRoot(self): 

1677 """Returns a random string of len 20 to serve as a root 

1678 name for the temporary bucket repo. 

1679 

1680 This is equivalent to tempfile.mkdtemp as this is what self.root 

1681 becomes when useTempRoot is True. 

1682 """ 

1683 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1684 return rndstr + "/" 

1685 

1686 def setUp(self): 

1687 config = Config(self.configFile) 

1688 uri = ResourcePath(config[".datastore.datastore.root"]) 

1689 self.bucketName = uri.netloc 

1690 

1691 # Enable S3 mocking of tests. 

1692 self.mock_s3.start() 

1693 

1694 # set up some fake credentials if they do not exist 

1695 self.usingDummyCredentials = setAwsEnvCredentials() 

1696 

1697 if self.useTempRoot: 

1698 self.root = self.genRoot() 

1699 rooturi = f"s3://{self.bucketName}/{self.root}" 

1700 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1701 

1702 # need local folder to store registry database 

1703 self.reg_dir = makeTestTempDir(TESTDIR) 

1704 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1705 

1706 # MOTO needs to know that we expect Bucket bucketname to exist 

1707 # (this used to be the class attribute bucketName) 

1708 s3 = boto3.resource("s3") 

1709 s3.create_bucket(Bucket=self.bucketName) 

1710 

1711 self.datastoreStr = f"datastore={self.root}" 

1712 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1713 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1714 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1715 

1716 def tearDown(self): 

1717 s3 = boto3.resource("s3") 

1718 bucket = s3.Bucket(self.bucketName) 

1719 try: 

1720 bucket.objects.all().delete() 

1721 except botocore.exceptions.ClientError as e: 

1722 if e.response["Error"]["Code"] == "404": 

1723 # the key was not reachable - pass 

1724 pass 

1725 else: 

1726 raise 

1727 

1728 bucket = s3.Bucket(self.bucketName) 

1729 bucket.delete() 

1730 

1731 # Stop the S3 mock. 

1732 self.mock_s3.stop() 

1733 

1734 # unset any potentially set dummy credentials 

1735 if self.usingDummyCredentials: 

1736 unsetAwsEnvCredentials() 

1737 

1738 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1739 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1740 

1741 if self.useTempRoot and os.path.exists(self.root): 

1742 shutil.rmtree(self.root, ignore_errors=True) 

1743 

1744 super().tearDown() 

1745 

1746 

1747class PosixDatastoreTransfers(unittest.TestCase): 

1748 """Test data transfers between butlers. 

1749 

1750 Test for different managers. UUID to UUID and integer to integer are 

1751 tested. UUID to integer is not supported since we do not currently 

1752 want to allow that. Integer to UUID is supported with the caveat 

1753 that UUID4 will be generated and this will be incorrect for raw 

1754 dataset types. The test ignores that. 

1755 """ 

1756 

1757 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1758 

1759 @classmethod 

1760 def setUpClass(cls): 

1761 cls.storageClassFactory = StorageClassFactory() 

1762 cls.storageClassFactory.addFromConfig(cls.configFile) 

1763 

1764 def setUp(self): 

1765 self.root = makeTestTempDir(TESTDIR) 

1766 self.config = Config(self.configFile) 

1767 

1768 def tearDown(self): 

1769 removeTestTempDir(self.root) 

1770 

1771 def create_butler(self, manager, label): 

1772 config = Config(self.configFile) 

1773 config["registry", "managers", "datasets"] = manager 

1774 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

1775 

1776 def create_butlers(self, manager1, manager2): 

1777 self.source_butler = self.create_butler(manager1, "1") 

1778 self.target_butler = self.create_butler(manager2, "2") 

1779 

1780 def testTransferUuidToUuid(self): 

1781 self.create_butlers( 

1782 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1783 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1784 ) 

1785 # Setting id_gen_map should have no effect here 

1786 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1787 

1788 def testTransferMissing(self): 

1789 """Test transfers where datastore records are missing. 

1790 

1791 This is how execution butler works. 

1792 """ 

1793 self.create_butlers( 

1794 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1795 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1796 ) 

1797 

1798 # Configure the source butler to allow trust. 

1799 self.source_butler.datastore.trustGetRequest = True 

1800 

1801 self.assertButlerTransfers(purge=True) 

1802 

1803 def testTransferMissingDisassembly(self): 

1804 """Test transfers where datastore records are missing. 

1805 

1806 This is how execution butler works. 

1807 """ 

1808 self.create_butlers( 

1809 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1810 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1811 ) 

1812 

1813 # Configure the source butler to allow trust. 

1814 self.source_butler.datastore.trustGetRequest = True 

1815 

1816 # Test disassembly. 

1817 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

1818 

1819 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"): 

1820 """Test that a run can be transferred to another butler.""" 

1821 

1822 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

1823 datasetTypeName = "random_data" 

1824 

1825 # Test will create 3 collections and we will want to transfer 

1826 # two of those three. 

1827 runs = ["run1", "run2", "other"] 

1828 

1829 # Also want to use two different dataset types to ensure that 

1830 # grouping works. 

1831 datasetTypeNames = ["random_data", "random_data_2"] 

1832 

1833 # Create the run collections in the source butler. 

1834 for run in runs: 

1835 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

1836 

1837 # Create dimensions in source butler. 

1838 n_exposures = 30 

1839 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1840 self.source_butler.registry.insertDimensionData( 

1841 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1842 ) 

1843 self.source_butler.registry.insertDimensionData( 

1844 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1845 ) 

1846 

1847 for i in range(n_exposures): 

1848 self.source_butler.registry.insertDimensionData( 

1849 "exposure", 

1850 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

1851 ) 

1852 

1853 # Create dataset types in the source butler. 

1854 dimensions = self.source_butler.registry.dimensions.extract(["instrument", "exposure"]) 

1855 for datasetTypeName in datasetTypeNames: 

1856 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1857 self.source_butler.registry.registerDatasetType(datasetType) 

1858 

1859 # Write a dataset to an unrelated run -- this will ensure that 

1860 # we are rewriting integer dataset ids in the target if necessary. 

1861 # Will not be relevant for UUID. 

1862 run = "distraction" 

1863 butler = Butler(butler=self.source_butler, run=run) 

1864 butler.put( 

1865 makeExampleMetrics(), 

1866 datasetTypeName, 

1867 exposure=1, 

1868 instrument="DummyCamComp", 

1869 physical_filter="d-r", 

1870 ) 

1871 

1872 # Write some example metrics to the source 

1873 butler = Butler(butler=self.source_butler) 

1874 

1875 # Set of DatasetRefs that should be in the list of refs to transfer 

1876 # but which will not be transferred. 

1877 deleted = set() 

1878 

1879 n_expected = 20 # Number of datasets expected to be transferred 

1880 source_refs = [] 

1881 for i in range(n_exposures): 

1882 # Put a third of datasets into each collection, only retain 

1883 # two thirds. 

1884 index = i % 3 

1885 run = runs[index] 

1886 datasetTypeName = datasetTypeNames[i % 2] 

1887 

1888 metric_data = { 

1889 "summary": {"counter": i}, 

1890 "output": {"text": "metric"}, 

1891 "data": [2 * x for x in range(i)], 

1892 } 

1893 metric = MetricsExample(**metric_data) 

1894 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1895 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

1896 

1897 # Remove the datastore record using low-level API 

1898 if purge: 

1899 # Remove records for a fraction. 

1900 if index == 1: 

1901 # For one of these delete the file as well. 

1902 # This allows the "missing" code to filter the 

1903 # file out. 

1904 if not deleted: 

1905 primary, uris = butler.datastore.getURIs(ref) 

1906 if primary: 

1907 primary.remove() 

1908 for uri in uris.values(): 

1909 uri.remove() 

1910 n_expected -= 1 

1911 deleted.add(ref) 

1912 

1913 # Remove the datastore record. 

1914 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

1915 

1916 if index < 2: 

1917 source_refs.append(ref) 

1918 if ref not in deleted: 

1919 new_metric = butler.get(ref.unresolved(), collections=run) 

1920 self.assertEqual(new_metric, metric) 

1921 

1922 # Create some bad dataset types to ensure we check for inconsistent 

1923 # definitions. 

1924 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

1925 for datasetTypeName in datasetTypeNames: 

1926 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

1927 self.target_butler.registry.registerDatasetType(datasetType) 

1928 with self.assertRaises(ConflictingDefinitionError) as cm: 

1929 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

1930 self.assertIn("dataset type differs", str(cm.exception)) 

1931 

1932 # And remove the bad definitions. 

1933 for datasetTypeName in datasetTypeNames: 

1934 self.target_butler.registry.removeDatasetType(datasetTypeName) 

1935 

1936 # Transfer without creating dataset types should fail. 

1937 with self.assertRaises(KeyError): 

1938 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

1939 

1940 # Transfer without creating dimensions should fail. 

1941 with self.assertRaises(ConflictingDefinitionError) as cm: 

1942 self.target_butler.transfer_from( 

1943 self.source_butler, source_refs, id_gen_map=id_gen_map, register_dataset_types=True 

1944 ) 

1945 self.assertIn("dimension", str(cm.exception)) 

1946 

1947 # The failed transfer above leaves registry in an inconsistent 

1948 # state because the run is created but then rolled back without 

1949 # the collection cache being cleared. For now force a refresh. 

1950 # Can remove with DM-35498. 

1951 self.target_butler.registry.refresh() 

1952 

1953 # Now transfer them to the second butler, including dimensions. 

1954 with self.assertLogs(level=logging.DEBUG) as cm: 

1955 transferred = self.target_butler.transfer_from( 

1956 self.source_butler, 

1957 source_refs, 

1958 id_gen_map=id_gen_map, 

1959 register_dataset_types=True, 

1960 transfer_dimensions=True, 

1961 ) 

1962 self.assertEqual(len(transferred), n_expected) 

1963 log_output = ";".join(cm.output) 

1964 self.assertIn("found in datastore for chunk", log_output) 

1965 self.assertIn("Creating output run", log_output) 

1966 

1967 # Do the transfer twice to ensure that it will do nothing extra. 

1968 # Only do this if purge=True because it does not work for int 

1969 # dataset_id. 

1970 if purge: 

1971 # This should not need to register dataset types. 

1972 transferred = self.target_butler.transfer_from( 

1973 self.source_butler, source_refs, id_gen_map=id_gen_map 

1974 ) 

1975 self.assertEqual(len(transferred), n_expected) 

1976 

1977 # Also do an explicit low-level transfer to trigger some 

1978 # edge cases. 

1979 with self.assertLogs(level=logging.DEBUG) as cm: 

1980 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

1981 log_output = ";".join(cm.output) 

1982 self.assertIn("no file artifacts exist", log_output) 

1983 

1984 with self.assertRaises(TypeError): 

1985 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) 

1986 

1987 with self.assertRaises(ValueError): 

1988 self.target_butler.datastore.transfer_from( 

1989 self.source_butler.datastore, source_refs, transfer="split" 

1990 ) 

1991 

1992 # Now try to get the same refs from the new butler. 

1993 for ref in source_refs: 

1994 if ref not in deleted: 

1995 unresolved_ref = ref.unresolved() 

1996 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run) 

1997 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run) 

1998 self.assertEqual(new_metric, old_metric) 

1999 

2000 # Now prune run2 collection and create instead a CHAINED collection. 

2001 # This should block the transfer. 

2002 self.target_butler.removeRuns(["run2"], unstore=True) 

2003 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2004 with self.assertRaises(CollectionTypeError): 

2005 # Re-importing the run1 datasets can be problematic if they 

2006 # use integer IDs so filter those out. 

2007 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2008 self.target_butler.transfer_from(self.source_butler, to_transfer, id_gen_map=id_gen_map) 

2009 

2010 

2011if __name__ == "__main__": 2011 ↛ 2012line 2011 didn't jump to line 2012, because the condition on line 2011 was never true

2012 unittest.main()