Coverage for tests/test_butler.py: 12%

1128 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-03 09:15 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import gc 

26import json 

27import logging 

28import os 

29import pathlib 

30import pickle 

31import posixpath 

32import random 

33import shutil 

34import string 

35import tempfile 

36import unittest 

37 

38try: 

39 import boto3 

40 import botocore 

41 from moto import mock_s3 

42except ImportError: 

43 boto3 = None 

44 

45 def mock_s3(cls): 

46 """A no-op decorator in case moto mock_s3 can not be imported.""" 

47 return cls 

48 

49 

50try: 

51 # It's possible but silly to have testing.postgresql installed without 

52 # having the postgresql server installed (because then nothing in 

53 # testing.postgresql would work), so we use the presence of that module 

54 # to test whether we can expect the server to be available. 

55 import testing.postgresql 

56except ImportError: 

57 testing = None 

58 

59import astropy.time 

60import sqlalchemy 

61from lsst.daf.butler import ( 

62 Butler, 

63 ButlerConfig, 

64 CollectionType, 

65 Config, 

66 DatasetIdGenEnum, 

67 DatasetRef, 

68 DatasetType, 

69 FileDataset, 

70 FileTemplate, 

71 FileTemplateValidationError, 

72 StorageClassFactory, 

73 ValidationError, 

74 script, 

75) 

76from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

77from lsst.daf.butler.registry import ( 

78 CollectionError, 

79 CollectionTypeError, 

80 ConflictingDefinitionError, 

81 DataIdValueError, 

82 MissingCollectionError, 

83 OrphanedRecordError, 

84) 

85from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

86from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

87from lsst.resources import ResourcePath 

88from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

89from lsst.utils import doImport 

90from lsst.utils.introspection import get_full_type_name 

91 

92TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

93 

94 

95def makeExampleMetrics(): 

96 return MetricsExample( 

97 {"AM1": 5.2, "AM2": 30.6}, 

98 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

99 [563, 234, 456.7, 752, 8, 9, 27], 

100 ) 

101 

102 

103class TransactionTestError(Exception): 

104 """Specific error for testing transactions, to prevent misdiagnosing 

105 that might otherwise occur when a standard exception is used. 

106 """ 

107 

108 pass 

109 

110 

111class ButlerConfigTests(unittest.TestCase): 

112 """Simple tests for ButlerConfig that are not tested in any other test 

113 cases.""" 

114 

115 def testSearchPath(self): 

116 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

117 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

118 config1 = ButlerConfig(configFile) 

119 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

120 

121 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

122 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

123 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

124 self.assertIn("testConfigs", "\n".join(cm.output)) 

125 

126 key = ("datastore", "records", "table") 

127 self.assertNotEqual(config1[key], config2[key]) 

128 self.assertEqual(config2[key], "override_record") 

129 

130 

131class ButlerPutGetTests: 

132 """Helper method for running a suite of put/get tests from different 

133 butler configurations.""" 

134 

135 root = None 

136 default_run = "ingésτ😺" 

137 

138 @staticmethod 

139 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

140 """Create a DatasetType and register it""" 

141 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

142 registry.registerDatasetType(datasetType) 

143 return datasetType 

144 

145 @classmethod 

146 def setUpClass(cls): 

147 cls.storageClassFactory = StorageClassFactory() 

148 cls.storageClassFactory.addFromConfig(cls.configFile) 

149 

150 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

151 datasetType = datasetRef.datasetType 

152 dataId = datasetRef.dataId 

153 deferred = butler.getDeferred(datasetRef) 

154 

155 for component in components: 

156 compTypeName = datasetType.componentTypeName(component) 

157 result = butler.get(compTypeName, dataId, collections=collections) 

158 self.assertEqual(result, getattr(reference, component)) 

159 result_deferred = deferred.get(component=component) 

160 self.assertEqual(result_deferred, result) 

161 

162 def tearDown(self): 

163 removeTestTempDir(self.root) 

164 

165 def create_butler(self, run, storageClass, datasetTypeName): 

166 butler = Butler(self.tmpConfigFile, run=run) 

167 

168 collections = set(butler.registry.queryCollections()) 

169 self.assertEqual(collections, set([run])) 

170 

171 # Create and register a DatasetType 

172 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

173 

174 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

175 

176 # Add needed Dimensions 

177 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

178 butler.registry.insertDimensionData( 

179 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

180 ) 

181 butler.registry.insertDimensionData( 

182 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

183 ) 

184 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

185 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

186 butler.registry.insertDimensionData( 

187 "visit", 

188 { 

189 "instrument": "DummyCamComp", 

190 "id": 423, 

191 "name": "fourtwentythree", 

192 "physical_filter": "d-r", 

193 "visit_system": 1, 

194 "datetime_begin": visit_start, 

195 "datetime_end": visit_end, 

196 }, 

197 ) 

198 

199 # Add more visits for some later tests 

200 for visit_id in (424, 425): 

201 butler.registry.insertDimensionData( 

202 "visit", 

203 { 

204 "instrument": "DummyCamComp", 

205 "id": visit_id, 

206 "name": f"fourtwentyfour_{visit_id}", 

207 "physical_filter": "d-r", 

208 "visit_system": 1, 

209 }, 

210 ) 

211 return butler, datasetType 

212 

213 def runPutGetTest(self, storageClass, datasetTypeName): 

214 # New datasets will be added to run and tag, but we will only look in 

215 # tag when looking up datasets. 

216 run = self.default_run 

217 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

218 

219 # Create and store a dataset 

220 metric = makeExampleMetrics() 

221 dataId = {"instrument": "DummyCamComp", "visit": 423} 

222 

223 # Create a DatasetRef for put 

224 refIn = DatasetRef(datasetType, dataId, id=None) 

225 

226 # Put with a preexisting id should fail 

227 with self.assertRaises(ValueError): 

228 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

229 

230 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

231 # and once with a DatasetType 

232 

233 # Keep track of any collections we add and do not clean up 

234 expected_collections = {run} 

235 

236 counter = 0 

237 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

238 # Since we are using subTest we can get cascading failures 

239 # here with the first attempt failing and the others failing 

240 # immediately because the dataset already exists. Work around 

241 # this by using a distinct run collection each time 

242 counter += 1 

243 this_run = f"put_run_{counter}" 

244 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

245 expected_collections.update({this_run}) 

246 

247 with self.subTest(args=args): 

248 ref = butler.put(metric, *args, run=this_run) 

249 self.assertIsInstance(ref, DatasetRef) 

250 

251 # Test getDirect 

252 metricOut = butler.get(ref) 

253 self.assertEqual(metric, metricOut) 

254 # Test get 

255 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

256 self.assertEqual(metric, metricOut) 

257 # Test get with a datasetRef 

258 metricOut = butler.get(ref, collections=this_run) 

259 self.assertEqual(metric, metricOut) 

260 # Test getDeferred with dataId 

261 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

262 self.assertEqual(metric, metricOut) 

263 # Test getDeferred with a datasetRef 

264 metricOut = butler.getDeferred(ref, collections=this_run).get() 

265 self.assertEqual(metric, metricOut) 

266 # and deferred direct with ref 

267 metricOut = butler.getDeferred(ref).get() 

268 self.assertEqual(metric, metricOut) 

269 

270 # Check we can get components 

271 if storageClass.isComposite(): 

272 self.assertGetComponents( 

273 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

274 ) 

275 

276 # Can the artifacts themselves be retrieved? 

277 if not butler.datastore.isEphemeral: 

278 root_uri = ResourcePath(self.root) 

279 

280 for preserve_path in (True, False): 

281 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

282 # Use copy so that we can test that overwrite 

283 # protection works (using "auto" for File URIs would 

284 # use hard links and subsequent transfer would work 

285 # because it knows they are the same file). 

286 transferred = butler.retrieveArtifacts( 

287 [ref], destination, preserve_path=preserve_path, transfer="copy" 

288 ) 

289 self.assertGreater(len(transferred), 0) 

290 artifacts = list(ResourcePath.findFileResources([destination])) 

291 self.assertEqual(set(transferred), set(artifacts)) 

292 

293 for artifact in transferred: 

294 path_in_destination = artifact.relative_to(destination) 

295 self.assertIsNotNone(path_in_destination) 

296 

297 # when path is not preserved there should not be 

298 # any path separators. 

299 num_seps = path_in_destination.count("/") 

300 if preserve_path: 

301 self.assertGreater(num_seps, 0) 

302 else: 

303 self.assertEqual(num_seps, 0) 

304 

305 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

306 n_uris = len(secondary_uris) 

307 if primary_uri: 

308 n_uris += 1 

309 self.assertEqual( 

310 len(artifacts), 

311 n_uris, 

312 "Comparing expected artifacts vs actual:" 

313 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

314 ) 

315 

316 if preserve_path: 

317 # No need to run these twice 

318 with self.assertRaises(ValueError): 

319 butler.retrieveArtifacts([ref], destination, transfer="move") 

320 

321 with self.assertRaises(FileExistsError): 

322 butler.retrieveArtifacts([ref], destination) 

323 

324 transferred_again = butler.retrieveArtifacts( 

325 [ref], destination, preserve_path=preserve_path, overwrite=True 

326 ) 

327 self.assertEqual(set(transferred_again), set(transferred)) 

328 

329 # Now remove the dataset completely. 

330 butler.pruneDatasets([ref], purge=True, unstore=True) 

331 # Lookup with original args should still fail. 

332 with self.assertRaises(LookupError): 

333 butler.datasetExists(*args, collections=this_run) 

334 # get() should still fail. 

335 with self.assertRaises(FileNotFoundError): 

336 butler.get(ref) 

337 # Registry shouldn't be able to find it by dataset_id anymore. 

338 self.assertIsNone(butler.registry.getDataset(ref.id)) 

339 

340 # Do explicit registry removal since we know they are 

341 # empty 

342 butler.registry.removeCollection(this_run) 

343 expected_collections.remove(this_run) 

344 

345 # Put the dataset again, since the last thing we did was remove it 

346 # and we want to use the default collection. 

347 ref = butler.put(metric, refIn) 

348 

349 # Get with parameters 

350 stop = 4 

351 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

352 self.assertNotEqual(metric, sliced) 

353 self.assertEqual(metric.summary, sliced.summary) 

354 self.assertEqual(metric.output, sliced.output) 

355 self.assertEqual(metric.data[:stop], sliced.data) 

356 # getDeferred with parameters 

357 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

358 self.assertNotEqual(metric, sliced) 

359 self.assertEqual(metric.summary, sliced.summary) 

360 self.assertEqual(metric.output, sliced.output) 

361 self.assertEqual(metric.data[:stop], sliced.data) 

362 # getDeferred with deferred parameters 

363 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

364 self.assertNotEqual(metric, sliced) 

365 self.assertEqual(metric.summary, sliced.summary) 

366 self.assertEqual(metric.output, sliced.output) 

367 self.assertEqual(metric.data[:stop], sliced.data) 

368 

369 if storageClass.isComposite(): 

370 # Check that components can be retrieved 

371 metricOut = butler.get(ref.datasetType.name, dataId) 

372 compNameS = ref.datasetType.componentTypeName("summary") 

373 compNameD = ref.datasetType.componentTypeName("data") 

374 summary = butler.get(compNameS, dataId) 

375 self.assertEqual(summary, metric.summary) 

376 data = butler.get(compNameD, dataId) 

377 self.assertEqual(data, metric.data) 

378 

379 if "counter" in storageClass.derivedComponents: 

380 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

381 self.assertEqual(count, len(data)) 

382 

383 count = butler.get( 

384 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

385 ) 

386 self.assertEqual(count, stop) 

387 

388 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

389 summary = butler.get(compRef) 

390 self.assertEqual(summary, metric.summary) 

391 

392 # Create a Dataset type that has the same name but is inconsistent. 

393 inconsistentDatasetType = DatasetType( 

394 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

395 ) 

396 

397 # Getting with a dataset type that does not match registry fails 

398 with self.assertRaises(ValueError): 

399 butler.get(inconsistentDatasetType, dataId) 

400 

401 # Combining a DatasetRef with a dataId should fail 

402 with self.assertRaises(ValueError): 

403 butler.get(ref, dataId) 

404 # Getting with an explicit ref should fail if the id doesn't match 

405 with self.assertRaises(ValueError): 

406 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

407 

408 # Getting a dataset with unknown parameters should fail 

409 with self.assertRaises(KeyError): 

410 butler.get(ref, parameters={"unsupported": True}) 

411 

412 # Check we have a collection 

413 collections = set(butler.registry.queryCollections()) 

414 self.assertEqual(collections, expected_collections) 

415 

416 # Clean up to check that we can remove something that may have 

417 # already had a component removed 

418 butler.pruneDatasets([ref], unstore=True, purge=True) 

419 

420 # Check that we can configure a butler to accept a put even 

421 # if it already has the dataset in registry. 

422 ref = butler.put(metric, refIn) 

423 

424 # Repeat put will fail. 

425 with self.assertRaises(ConflictingDefinitionError): 

426 butler.put(metric, refIn) 

427 

428 # Remove the datastore entry. 

429 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

430 

431 # Put will still fail 

432 with self.assertRaises(ConflictingDefinitionError): 

433 butler.put(metric, refIn) 

434 

435 # Allow the put to succeed 

436 butler._allow_put_of_predefined_dataset = True 

437 ref2 = butler.put(metric, refIn) 

438 self.assertEqual(ref2.id, ref.id) 

439 

440 # A second put will still fail but with a different exception 

441 # than before. 

442 with self.assertRaises(ConflictingDefinitionError): 

443 butler.put(metric, refIn) 

444 

445 # Reset the flag to avoid confusion 

446 butler._allow_put_of_predefined_dataset = False 

447 

448 # Leave the dataset in place since some downstream tests require 

449 # something to be present 

450 

451 return butler 

452 

453 def testDeferredCollectionPassing(self): 

454 # Construct a butler with no run or collection, but make it writeable. 

455 butler = Butler(self.tmpConfigFile, writeable=True) 

456 # Create and register a DatasetType 

457 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

458 datasetType = self.addDatasetType( 

459 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

460 ) 

461 # Add needed Dimensions 

462 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

463 butler.registry.insertDimensionData( 

464 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

465 ) 

466 butler.registry.insertDimensionData( 

467 "visit", 

468 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

469 ) 

470 dataId = {"instrument": "DummyCamComp", "visit": 423} 

471 # Create dataset. 

472 metric = makeExampleMetrics() 

473 # Register a new run and put dataset. 

474 run = "deferred" 

475 self.assertTrue(butler.registry.registerRun(run)) 

476 # Second time it will be allowed but indicate no-op 

477 self.assertFalse(butler.registry.registerRun(run)) 

478 ref = butler.put(metric, datasetType, dataId, run=run) 

479 # Putting with no run should fail with TypeError. 

480 with self.assertRaises(CollectionError): 

481 butler.put(metric, datasetType, dataId) 

482 # Dataset should exist. 

483 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

484 # We should be able to get the dataset back, but with and without 

485 # a deferred dataset handle. 

486 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

487 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

488 # Trying to find the dataset without any collection is a TypeError. 

489 with self.assertRaises(CollectionError): 

490 butler.datasetExists(datasetType, dataId) 

491 with self.assertRaises(CollectionError): 

492 butler.get(datasetType, dataId) 

493 # Associate the dataset with a different collection. 

494 butler.registry.registerCollection("tagged") 

495 butler.registry.associate("tagged", [ref]) 

496 # Deleting the dataset from the new collection should make it findable 

497 # in the original collection. 

498 butler.pruneDatasets([ref], tags=["tagged"]) 

499 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

500 

501 

502class ButlerTests(ButlerPutGetTests): 

503 """Tests for Butler.""" 

504 

505 useTempRoot = True 

506 

507 def setUp(self): 

508 """Create a new butler root for each test.""" 

509 self.root = makeTestTempDir(TESTDIR) 

510 Butler.makeRepo(self.root, config=Config(self.configFile)) 

511 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

512 

513 def testConstructor(self): 

514 """Independent test of constructor.""" 

515 butler = Butler(self.tmpConfigFile, run=self.default_run) 

516 self.assertIsInstance(butler, Butler) 

517 

518 # Check that butler.yaml is added automatically. 

519 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

520 config_dir = self.tmpConfigFile[: -len(end)] 

521 butler = Butler(config_dir, run=self.default_run) 

522 self.assertIsInstance(butler, Butler) 

523 

524 # Even with a ResourcePath. 

525 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

526 self.assertIsInstance(butler, Butler) 

527 

528 collections = set(butler.registry.queryCollections()) 

529 self.assertEqual(collections, {self.default_run}) 

530 

531 # Check that some special characters can be included in run name. 

532 special_run = "u@b.c-A" 

533 butler_special = Butler(butler=butler, run=special_run) 

534 collections = set(butler_special.registry.queryCollections("*@*")) 

535 self.assertEqual(collections, {special_run}) 

536 

537 butler2 = Butler(butler=butler, collections=["other"]) 

538 self.assertEqual(butler2.collections, ("other",)) 

539 self.assertIsNone(butler2.run) 

540 self.assertIs(butler.datastore, butler2.datastore) 

541 

542 # Test that we can use an environment variable to find this 

543 # repository. 

544 butler_index = Config() 

545 butler_index["label"] = self.tmpConfigFile 

546 for suffix in (".yaml", ".json"): 

547 # Ensure that the content differs so that we know that 

548 # we aren't reusing the cache. 

549 bad_label = f"s3://bucket/not_real{suffix}" 

550 butler_index["bad_label"] = bad_label 

551 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

552 butler_index.dumpToUri(temp_file) 

553 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

554 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label"))) 

555 uri = Butler.get_repo_uri("bad_label") 

556 self.assertEqual(uri, ResourcePath(bad_label)) 

557 uri = Butler.get_repo_uri("label") 

558 butler = Butler(uri, writeable=False) 

559 self.assertIsInstance(butler, Butler) 

560 butler = Butler("label", writeable=False) 

561 self.assertIsInstance(butler, Butler) 

562 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

563 Butler("not_there", writeable=False) 

564 with self.assertRaises(KeyError) as cm: 

565 Butler.get_repo_uri("missing") 

566 self.assertIn("not known to", str(cm.exception)) 

567 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

568 with self.assertRaises(FileNotFoundError): 

569 Butler.get_repo_uri("label") 

570 self.assertEqual(Butler.get_known_repos(), set()) 

571 with self.assertRaises(KeyError) as cm: 

572 # No environment variable set. 

573 Butler.get_repo_uri("label") 

574 self.assertIn("No repository index defined", str(cm.exception)) 

575 with self.assertRaisesRegex(FileNotFoundError, "no known aliases"): 

576 # No aliases registered. 

577 Butler("not_there") 

578 self.assertEqual(Butler.get_known_repos(), set()) 

579 

580 def testBasicPutGet(self): 

581 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

582 self.runPutGetTest(storageClass, "test_metric") 

583 

584 def testCompositePutGetConcrete(self): 

585 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

586 butler = self.runPutGetTest(storageClass, "test_metric") 

587 

588 # Should *not* be disassembled 

589 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

590 self.assertEqual(len(datasets), 1) 

591 uri, components = butler.getURIs(datasets[0]) 

592 self.assertIsInstance(uri, ResourcePath) 

593 self.assertFalse(components) 

594 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

595 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

596 

597 # Predicted dataset 

598 dataId = {"instrument": "DummyCamComp", "visit": 424} 

599 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

600 self.assertFalse(components) 

601 self.assertIsInstance(uri, ResourcePath) 

602 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

603 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

604 

605 def testCompositePutGetVirtual(self): 

606 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

607 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

608 

609 # Should be disassembled 

610 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

611 self.assertEqual(len(datasets), 1) 

612 uri, components = butler.getURIs(datasets[0]) 

613 

614 if butler.datastore.isEphemeral: 

615 # Never disassemble in-memory datastore 

616 self.assertIsInstance(uri, ResourcePath) 

617 self.assertFalse(components) 

618 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

619 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

620 else: 

621 self.assertIsNone(uri) 

622 self.assertEqual(set(components), set(storageClass.components)) 

623 for compuri in components.values(): 

624 self.assertIsInstance(compuri, ResourcePath) 

625 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

626 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

627 

628 # Predicted dataset 

629 dataId = {"instrument": "DummyCamComp", "visit": 424} 

630 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

631 

632 if butler.datastore.isEphemeral: 

633 # Never disassembled 

634 self.assertIsInstance(uri, ResourcePath) 

635 self.assertFalse(components) 

636 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

637 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

638 else: 

639 self.assertIsNone(uri) 

640 self.assertEqual(set(components), set(storageClass.components)) 

641 for compuri in components.values(): 

642 self.assertIsInstance(compuri, ResourcePath) 

643 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

644 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

645 

646 def testStorageClassOverrideGet(self): 

647 """Test storage class conversion on get with override.""" 

648 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

649 datasetTypeName = "anything" 

650 run = self.default_run 

651 

652 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

653 

654 # Create and store a dataset. 

655 metric = makeExampleMetrics() 

656 dataId = {"instrument": "DummyCamComp", "visit": 423} 

657 

658 ref = butler.put(metric, datasetType, dataId) 

659 

660 # Return native type. 

661 retrieved = butler.get(ref) 

662 self.assertEqual(retrieved, metric) 

663 

664 # Specify an override. 

665 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion") 

666 model = butler.get(ref, storageClass=new_sc) 

667 self.assertNotEqual(type(model), type(retrieved)) 

668 self.assertIs(type(model), new_sc.pytype) 

669 self.assertEqual(retrieved, model) 

670 

671 # Defer but override later. 

672 deferred = butler.getDeferred(ref) 

673 model = deferred.get(storageClass=new_sc) 

674 self.assertIs(type(model), new_sc.pytype) 

675 self.assertEqual(retrieved, model) 

676 

677 # Defer but override up front. 

678 deferred = butler.getDeferred(ref, storageClass=new_sc) 

679 model = deferred.get() 

680 self.assertIs(type(model), new_sc.pytype) 

681 self.assertEqual(retrieved, model) 

682 

683 # Retrieve a component. Should be a tuple. 

684 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple") 

685 self.assertIs(type(data), tuple) 

686 self.assertEqual(data, tuple(retrieved.data)) 

687 

688 # Parameter on the write storage class should work regardless 

689 # of read storage class. 

690 data = butler.get( 

691 "anything.data", 

692 dataId, 

693 storageClass="StructuredDataDataTestTuple", 

694 parameters={"slice": slice(2, 4)}, 

695 ) 

696 self.assertEqual(len(data), 2) 

697 

698 # Try a parameter that is known to the read storage class but not 

699 # the write storage class. 

700 with self.assertRaises(KeyError): 

701 butler.get( 

702 "anything.data", 

703 dataId, 

704 storageClass="StructuredDataDataTestTuple", 

705 parameters={"xslice": slice(2, 4)}, 

706 ) 

707 

708 def testPytypePutCoercion(self): 

709 """Test python type coercion on Butler.get and put.""" 

710 

711 # Store some data with the normal example storage class. 

712 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

713 datasetTypeName = "test_metric" 

714 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

715 

716 dataId = {"instrument": "DummyCamComp", "visit": 423} 

717 

718 # Put a dict and this should coerce to a MetricsExample 

719 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

720 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

721 test_metric = butler.get(metric_ref) 

722 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

723 self.assertEqual(test_metric.summary, test_dict["summary"]) 

724 self.assertEqual(test_metric.output, test_dict["output"]) 

725 

726 # Check that the put still works if a DatasetType is given with 

727 # a definition matching this python type. 

728 registry_type = butler.registry.getDatasetType(datasetTypeName) 

729 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

730 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

731 self.assertEqual(metric2_ref.datasetType, registry_type) 

732 

733 # The get will return the type expected by registry. 

734 test_metric2 = butler.get(metric2_ref) 

735 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

736 

737 # Make a new DatasetRef with the compatible but different DatasetType. 

738 # This should now return a dict. 

739 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

740 test_dict2 = butler.get(new_ref) 

741 self.assertEqual(get_full_type_name(test_dict2), "dict") 

742 

743 # Get it again with the wrong dataset type definition using get() 

744 # rather than get(). This should be consistent with get() 

745 # behavior and return the type of the DatasetType. 

746 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

747 self.assertEqual(get_full_type_name(test_dict3), "dict") 

748 

749 def testIngest(self): 

750 butler = Butler(self.tmpConfigFile, run=self.default_run) 

751 

752 # Create and register a DatasetType 

753 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

754 

755 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

756 datasetTypeName = "metric" 

757 

758 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

759 

760 # Add needed Dimensions 

761 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

762 butler.registry.insertDimensionData( 

763 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

764 ) 

765 for detector in (1, 2): 

766 butler.registry.insertDimensionData( 

767 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

768 ) 

769 

770 butler.registry.insertDimensionData( 

771 "visit", 

772 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

773 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

774 ) 

775 

776 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

777 dataRoot = os.path.join(TESTDIR, "data", "basic") 

778 datasets = [] 

779 for detector in (1, 2): 

780 detector_name = f"detector_{detector}" 

781 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

782 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

783 # Create a DatasetRef for ingest 

784 refIn = DatasetRef(datasetType, dataId, id=None) 

785 

786 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

787 

788 butler.ingest(*datasets, transfer="copy") 

789 

790 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

791 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

792 

793 metrics1 = butler.get(datasetTypeName, dataId1) 

794 metrics2 = butler.get(datasetTypeName, dataId2) 

795 self.assertNotEqual(metrics1, metrics2) 

796 

797 # Compare URIs 

798 uri1 = butler.getURI(datasetTypeName, dataId1) 

799 uri2 = butler.getURI(datasetTypeName, dataId2) 

800 self.assertNotEqual(uri1, uri2) 

801 

802 # Now do a multi-dataset but single file ingest 

803 metricFile = os.path.join(dataRoot, "detectors.yaml") 

804 refs = [] 

805 for detector in (1, 2): 

806 detector_name = f"detector_{detector}" 

807 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

808 # Create a DatasetRef for ingest 

809 refs.append(DatasetRef(datasetType, dataId, id=None)) 

810 

811 # Test "move" transfer to ensure that the files themselves 

812 # have disappeared following ingest. 

813 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile: 

814 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy") 

815 

816 datasets = [] 

817 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter)) 

818 

819 butler.ingest(*datasets, transfer="move", record_validation_info=False) 

820 self.assertFalse(tempFile.exists()) 

821 

822 # Check that the datastore recorded no file size. 

823 # Not all datastores can support this. 

824 try: 

825 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0]) 

826 self.assertEqual(infos[0].file_size, -1) 

827 except AttributeError: 

828 pass 

829 

830 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

831 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

832 

833 multi1 = butler.get(datasetTypeName, dataId1) 

834 multi2 = butler.get(datasetTypeName, dataId2) 

835 

836 self.assertEqual(multi1, metrics1) 

837 self.assertEqual(multi2, metrics2) 

838 

839 # Compare URIs 

840 uri1 = butler.getURI(datasetTypeName, dataId1) 

841 uri2 = butler.getURI(datasetTypeName, dataId2) 

842 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

843 

844 # Test that removing one does not break the second 

845 # This line will issue a warning log message for a ChainedDatastore 

846 # that uses an InMemoryDatastore since in-memory can not ingest 

847 # files. 

848 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

849 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

850 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

851 multi2b = butler.get(datasetTypeName, dataId2) 

852 self.assertEqual(multi2, multi2b) 

853 

854 def testPickle(self): 

855 """Test pickle support.""" 

856 butler = Butler(self.tmpConfigFile, run=self.default_run) 

857 butlerOut = pickle.loads(pickle.dumps(butler)) 

858 self.assertIsInstance(butlerOut, Butler) 

859 self.assertEqual(butlerOut._config, butler._config) 

860 self.assertEqual(butlerOut.collections, butler.collections) 

861 self.assertEqual(butlerOut.run, butler.run) 

862 

863 def testGetDatasetTypes(self): 

864 butler = Butler(self.tmpConfigFile, run=self.default_run) 

865 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

866 dimensionEntries = [ 

867 ( 

868 "instrument", 

869 {"instrument": "DummyCam"}, 

870 {"instrument": "DummyHSC"}, 

871 {"instrument": "DummyCamComp"}, 

872 ), 

873 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

874 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

875 ] 

876 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

877 # Add needed Dimensions 

878 for args in dimensionEntries: 

879 butler.registry.insertDimensionData(*args) 

880 

881 # When a DatasetType is added to the registry entries are not created 

882 # for components but querying them can return the components. 

883 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

884 components = set() 

885 for datasetTypeName in datasetTypeNames: 

886 # Create and register a DatasetType 

887 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

888 

889 for componentName in storageClass.components: 

890 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

891 

892 fromRegistry: set[DatasetType] = set() 

893 for parent_dataset_type in butler.registry.queryDatasetTypes(): 

894 fromRegistry.add(parent_dataset_type) 

895 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes()) 

896 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

897 

898 # Now that we have some dataset types registered, validate them 

899 butler.validateConfiguration( 

900 ignore=[ 

901 "test_metric_comp", 

902 "metric3", 

903 "metric5", 

904 "calexp", 

905 "DummySC", 

906 "datasetType.component", 

907 "random_data", 

908 "random_data_2", 

909 ] 

910 ) 

911 

912 # Add a new datasetType that will fail template validation 

913 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

914 if self.validationCanFail: 

915 with self.assertRaises(ValidationError): 

916 butler.validateConfiguration() 

917 

918 # Rerun validation but with a subset of dataset type names 

919 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

920 

921 # Rerun validation but ignore the bad datasetType 

922 butler.validateConfiguration( 

923 ignore=[ 

924 "test_metric_comp", 

925 "metric3", 

926 "metric5", 

927 "calexp", 

928 "DummySC", 

929 "datasetType.component", 

930 "random_data", 

931 "random_data_2", 

932 ] 

933 ) 

934 

935 def testTransaction(self): 

936 butler = Butler(self.tmpConfigFile, run=self.default_run) 

937 datasetTypeName = "test_metric" 

938 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

939 dimensionEntries = ( 

940 ("instrument", {"instrument": "DummyCam"}), 

941 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

942 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

943 ) 

944 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

945 metric = makeExampleMetrics() 

946 dataId = {"instrument": "DummyCam", "visit": 42} 

947 # Create and register a DatasetType 

948 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

949 with self.assertRaises(TransactionTestError): 

950 with butler.transaction(): 

951 # Add needed Dimensions 

952 for args in dimensionEntries: 

953 butler.registry.insertDimensionData(*args) 

954 # Store a dataset 

955 ref = butler.put(metric, datasetTypeName, dataId) 

956 self.assertIsInstance(ref, DatasetRef) 

957 # Test getDirect 

958 metricOut = butler.get(ref) 

959 self.assertEqual(metric, metricOut) 

960 # Test get 

961 metricOut = butler.get(datasetTypeName, dataId) 

962 self.assertEqual(metric, metricOut) 

963 # Check we can get components 

964 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

965 raise TransactionTestError("This should roll back the entire transaction") 

966 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

967 butler.registry.expandDataId(dataId) 

968 # Should raise LookupError for missing data ID value 

969 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

970 butler.get(datasetTypeName, dataId) 

971 # Also check explicitly if Dataset entry is missing 

972 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

973 # Direct retrieval should not find the file in the Datastore 

974 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

975 butler.get(ref) 

976 

977 def testMakeRepo(self): 

978 """Test that we can write butler configuration to a new repository via 

979 the Butler.makeRepo interface and then instantiate a butler from the 

980 repo root. 

981 """ 

982 # Do not run the test if we know this datastore configuration does 

983 # not support a file system root 

984 if self.fullConfigKey is None: 

985 return 

986 

987 # create two separate directories 

988 root1 = tempfile.mkdtemp(dir=self.root) 

989 root2 = tempfile.mkdtemp(dir=self.root) 

990 

991 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

992 limited = Config(self.configFile) 

993 butler1 = Butler(butlerConfig) 

994 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

995 full = Config(self.tmpConfigFile) 

996 butler2 = Butler(butlerConfig) 

997 # Butlers should have the same configuration regardless of whether 

998 # defaults were expanded. 

999 self.assertEqual(butler1._config, butler2._config) 

1000 # Config files loaded directly should not be the same. 

1001 self.assertNotEqual(limited, full) 

1002 # Make sure "limited" doesn't have a few keys we know it should be 

1003 # inheriting from defaults. 

1004 self.assertIn(self.fullConfigKey, full) 

1005 self.assertNotIn(self.fullConfigKey, limited) 

1006 

1007 # Collections don't appear until something is put in them 

1008 collections1 = set(butler1.registry.queryCollections()) 

1009 self.assertEqual(collections1, set()) 

1010 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1011 

1012 # Check that a config with no associated file name will not 

1013 # work properly with relocatable Butler repo 

1014 butlerConfig.configFile = None 

1015 with self.assertRaises(ValueError): 

1016 Butler(butlerConfig) 

1017 

1018 with self.assertRaises(FileExistsError): 

1019 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1020 

1021 def testStringification(self): 

1022 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1023 butlerStr = str(butler) 

1024 

1025 if self.datastoreStr is not None: 

1026 for testStr in self.datastoreStr: 

1027 self.assertIn(testStr, butlerStr) 

1028 if self.registryStr is not None: 

1029 self.assertIn(self.registryStr, butlerStr) 

1030 

1031 datastoreName = butler.datastore.name 

1032 if self.datastoreName is not None: 

1033 for testStr in self.datastoreName: 

1034 self.assertIn(testStr, datastoreName) 

1035 

1036 def testButlerRewriteDataId(self): 

1037 """Test that dataIds can be rewritten based on dimension records.""" 

1038 

1039 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1040 

1041 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1042 datasetTypeName = "random_data" 

1043 

1044 # Create dimension records. 

1045 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1046 butler.registry.insertDimensionData( 

1047 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1048 ) 

1049 butler.registry.insertDimensionData( 

1050 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1051 ) 

1052 

1053 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1054 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1055 butler.registry.registerDatasetType(datasetType) 

1056 

1057 n_exposures = 5 

1058 dayobs = 20210530 

1059 

1060 for i in range(n_exposures): 

1061 butler.registry.insertDimensionData( 

1062 "exposure", 

1063 { 

1064 "instrument": "DummyCamComp", 

1065 "id": i, 

1066 "obs_id": f"exp{i}", 

1067 "seq_num": i, 

1068 "day_obs": dayobs, 

1069 "physical_filter": "d-r", 

1070 }, 

1071 ) 

1072 

1073 # Write some data. 

1074 for i in range(n_exposures): 

1075 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1076 

1077 # Use the seq_num for the put to test rewriting. 

1078 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1079 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1080 

1081 # Check that the exposure is correct in the dataId 

1082 self.assertEqual(ref.dataId["exposure"], i) 

1083 

1084 # and check that we can get the dataset back with the same dataId 

1085 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1086 self.assertEqual(new_metric, metric) 

1087 

1088 

1089class FileDatastoreButlerTests(ButlerTests): 

1090 """Common tests and specialization of ButlerTests for butlers backed 

1091 by datastores that inherit from FileDatastore. 

1092 """ 

1093 

1094 def checkFileExists(self, root, relpath): 

1095 """Checks if file exists at a given path (relative to root). 

1096 

1097 Test testPutTemplates verifies actual physical existance of the files 

1098 in the requested location. 

1099 """ 

1100 uri = ResourcePath(root, forceDirectory=True) 

1101 return uri.join(relpath).exists() 

1102 

1103 def testPutTemplates(self): 

1104 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1105 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1106 

1107 # Add needed Dimensions 

1108 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1109 butler.registry.insertDimensionData( 

1110 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1111 ) 

1112 butler.registry.insertDimensionData( 

1113 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1114 ) 

1115 butler.registry.insertDimensionData( 

1116 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1117 ) 

1118 

1119 # Create and store a dataset 

1120 metric = makeExampleMetrics() 

1121 

1122 # Create two almost-identical DatasetTypes (both will use default 

1123 # template) 

1124 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1125 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1126 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1127 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1128 

1129 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1130 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1131 

1132 # Put with exactly the data ID keys needed 

1133 ref = butler.put(metric, "metric1", dataId1) 

1134 uri = butler.getURI(ref) 

1135 self.assertTrue(uri.exists()) 

1136 self.assertTrue( 

1137 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle") 

1138 ) 

1139 

1140 # Check the template based on dimensions 

1141 if hasattr(butler.datastore, "templates"): 

1142 butler.datastore.templates.validateTemplates([ref]) 

1143 

1144 # Put with extra data ID keys (physical_filter is an optional 

1145 # dependency); should not change template (at least the way we're 

1146 # defining them to behave now; the important thing is that they 

1147 # must be consistent). 

1148 ref = butler.put(metric, "metric2", dataId2) 

1149 uri = butler.getURI(ref) 

1150 self.assertTrue(uri.exists()) 

1151 self.assertTrue( 

1152 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle") 

1153 ) 

1154 

1155 # Check the template based on dimensions 

1156 if hasattr(butler.datastore, "templates"): 

1157 butler.datastore.templates.validateTemplates([ref]) 

1158 

1159 # Use a template that has a typo in dimension record metadata. 

1160 # Easier to test with a butler that has a ref with records attached. 

1161 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1162 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1163 path = template.format(ref) 

1164 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1165 

1166 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1167 with self.assertRaises(KeyError): 

1168 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1169 template.format(ref) 

1170 

1171 # Now use a file template that will not result in unique filenames 

1172 with self.assertRaises(FileTemplateValidationError): 

1173 butler.put(metric, "metric3", dataId1) 

1174 

1175 def testImportExport(self): 

1176 # Run put/get tests just to create and populate a repo. 

1177 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1178 self.runImportExportTest(storageClass) 

1179 

1180 @unittest.expectedFailure 

1181 def testImportExportVirtualComposite(self): 

1182 # Run put/get tests just to create and populate a repo. 

1183 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1184 self.runImportExportTest(storageClass) 

1185 

1186 def runImportExportTest(self, storageClass): 

1187 """This test does an export to a temp directory and an import back 

1188 into a new temp directory repo. It does not assume a posix datastore""" 

1189 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1190 

1191 # Test that we must have a file extension. 

1192 with self.assertRaises(ValueError): 

1193 with exportButler.export(filename="dump", directory=".") as export: 

1194 pass 

1195 

1196 # Test that unknown format is not allowed. 

1197 with self.assertRaises(ValueError): 

1198 with exportButler.export(filename="dump.fits", directory=".") as export: 

1199 pass 

1200 

1201 # Test that the repo actually has at least one dataset. 

1202 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1203 self.assertGreater(len(datasets), 0) 

1204 # Add a DimensionRecord that's unused by those datasets. 

1205 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1206 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1207 # Export and then import datasets. 

1208 with safeTestTempDir(TESTDIR) as exportDir: 

1209 exportFile = os.path.join(exportDir, "exports.yaml") 

1210 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1211 export.saveDatasets(datasets) 

1212 # Export the same datasets again. This should quietly do 

1213 # nothing because of internal deduplication, and it shouldn't 

1214 # complain about being asked to export the "htm7" elements even 

1215 # though there aren't any in these datasets or in the database. 

1216 export.saveDatasets(datasets, elements=["htm7"]) 

1217 # Save one of the data IDs again; this should be harmless 

1218 # because of internal deduplication. 

1219 export.saveDataIds([datasets[0].dataId]) 

1220 # Save some dimension records directly. 

1221 export.saveDimensionData("skymap", [skymapRecord]) 

1222 self.assertTrue(os.path.exists(exportFile)) 

1223 with safeTestTempDir(TESTDIR) as importDir: 

1224 # We always want this to be a local posix butler 

1225 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1226 # Calling script.butlerImport tests the implementation of the 

1227 # butler command line interface "import" subcommand. Functions 

1228 # in the script folder are generally considered protected and 

1229 # should not be used as public api. 

1230 with open(exportFile, "r") as f: 

1231 script.butlerImport( 

1232 importDir, 

1233 export_file=f, 

1234 directory=exportDir, 

1235 transfer="auto", 

1236 skip_dimensions=None, 

1237 reuse_ids=False, 

1238 ) 

1239 importButler = Butler(importDir, run=self.default_run) 

1240 for ref in datasets: 

1241 with self.subTest(ref=ref): 

1242 # Test for existence by passing in the DatasetType and 

1243 # data ID separately, to avoid lookup by dataset_id. 

1244 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1245 self.assertEqual( 

1246 list(importButler.registry.queryDimensionRecords("skymap")), 

1247 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)], 

1248 ) 

1249 

1250 def testRemoveRuns(self): 

1251 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1252 butler = Butler(self.tmpConfigFile, writeable=True) 

1253 # Load registry data with dimensions to hang datasets off of. 

1254 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1255 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1256 # Add some RUN-type collection. 

1257 run1 = "run1" 

1258 butler.registry.registerRun(run1) 

1259 run2 = "run2" 

1260 butler.registry.registerRun(run2) 

1261 # put a dataset in each 

1262 metric = makeExampleMetrics() 

1263 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1264 datasetType = self.addDatasetType( 

1265 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1266 ) 

1267 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1268 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1269 uri1 = butler.getURI(ref1, collections=[run1]) 

1270 uri2 = butler.getURI(ref2, collections=[run2]) 

1271 

1272 with self.assertRaises(OrphanedRecordError): 

1273 butler.registry.removeDatasetType(datasetType.name) 

1274 

1275 # Remove from both runs with different values for unstore. 

1276 butler.removeRuns([run1], unstore=True) 

1277 butler.removeRuns([run2], unstore=False) 

1278 # Should be nothing in registry for either one, and datastore should 

1279 # not think either exists. 

1280 with self.assertRaises(MissingCollectionError): 

1281 butler.registry.getCollectionType(run1) 

1282 with self.assertRaises(MissingCollectionError): 

1283 butler.registry.getCollectionType(run2) 

1284 self.assertFalse(butler.datastore.exists(ref1)) 

1285 self.assertFalse(butler.datastore.exists(ref2)) 

1286 # The ref we unstored should be gone according to the URI, but the 

1287 # one we forgot should still be around. 

1288 self.assertFalse(uri1.exists()) 

1289 self.assertTrue(uri2.exists()) 

1290 

1291 # Now that the collections have been pruned we can remove the 

1292 # dataset type 

1293 butler.registry.removeDatasetType(datasetType.name) 

1294 

1295 with self.assertLogs("lsst.daf.butler.registries", "INFO") as cm: 

1296 butler.registry.removeDatasetType(tuple(["test*", "test*"])) 

1297 self.assertIn("not defined", "\n".join(cm.output)) 

1298 

1299 

1300class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1301 """PosixDatastore specialization of a butler""" 

1302 

1303 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1304 fullConfigKey = ".datastore.formatters" 

1305 validationCanFail = True 

1306 datastoreStr = ["/tmp"] 

1307 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1308 registryStr = "/gen3.sqlite3" 

1309 

1310 def testPathConstructor(self): 

1311 """Independent test of constructor using PathLike.""" 

1312 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1313 self.assertIsInstance(butler, Butler) 

1314 

1315 # And again with a Path object with the butler yaml 

1316 path = pathlib.Path(self.tmpConfigFile) 

1317 butler = Butler(path, writeable=False) 

1318 self.assertIsInstance(butler, Butler) 

1319 

1320 # And again with a Path object without the butler yaml 

1321 # (making sure we skip it if the tmp config doesn't end 

1322 # in butler.yaml -- which is the case for a subclass) 

1323 if self.tmpConfigFile.endswith("butler.yaml"): 

1324 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1325 butler = Butler(path, writeable=False) 

1326 self.assertIsInstance(butler, Butler) 

1327 

1328 def testExportTransferCopy(self): 

1329 """Test local export using all transfer modes""" 

1330 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1331 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1332 # Test that the repo actually has at least one dataset. 

1333 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1334 self.assertGreater(len(datasets), 0) 

1335 uris = [exportButler.getURI(d) for d in datasets] 

1336 datastoreRoot = exportButler.datastore.root 

1337 

1338 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1339 

1340 for path in pathsInStore: 

1341 # Assume local file system 

1342 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1343 

1344 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1345 with safeTestTempDir(TESTDIR) as exportDir: 

1346 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1347 export.saveDatasets(datasets) 

1348 for path in pathsInStore: 

1349 self.assertTrue( 

1350 self.checkFileExists(exportDir, path), 

1351 f"Check that mode {transfer} exported files", 

1352 ) 

1353 

1354 def testPruneDatasets(self): 

1355 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1356 butler = Butler(self.tmpConfigFile, writeable=True) 

1357 # Load registry data with dimensions to hang datasets off of. 

1358 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1359 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1360 # Add some RUN-type collections. 

1361 run1 = "run1" 

1362 butler.registry.registerRun(run1) 

1363 run2 = "run2" 

1364 butler.registry.registerRun(run2) 

1365 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1366 # different runs. ref3 has a different data ID. 

1367 metric = makeExampleMetrics() 

1368 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1369 datasetType = self.addDatasetType( 

1370 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1371 ) 

1372 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1373 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1374 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1375 

1376 # Simple prune. 

1377 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1378 with self.assertRaises(LookupError): 

1379 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1) 

1380 

1381 # Put data back. 

1382 ref1 = butler.put(metric, ref1.unresolved(), run=run1) 

1383 ref2 = butler.put(metric, ref2.unresolved(), run=run2) 

1384 ref3 = butler.put(metric, ref3.unresolved(), run=run1) 

1385 

1386 # Check that in normal mode, deleting the record will lead to 

1387 # trash not touching the file. 

1388 uri1 = butler.datastore.getURI(ref1) 

1389 butler.datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table 

1390 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id}) 

1391 butler.datastore.trash(ref1) 

1392 butler.datastore.emptyTrash() 

1393 self.assertTrue(uri1.exists()) 

1394 uri1.remove() # Clean it up. 

1395 

1396 # Simulate execution butler setup by deleting the datastore 

1397 # record but keeping the file around and trusting. 

1398 butler.datastore.trustGetRequest = True 

1399 uri2 = butler.datastore.getURI(ref2) 

1400 uri3 = butler.datastore.getURI(ref3) 

1401 self.assertTrue(uri2.exists()) 

1402 self.assertTrue(uri3.exists()) 

1403 

1404 # Remove the datastore record. 

1405 butler.datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table 

1406 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id}) 

1407 self.assertTrue(uri2.exists()) 

1408 butler.datastore.trash([ref2, ref3]) 

1409 # Immediate removal for ref2 file 

1410 self.assertFalse(uri2.exists()) 

1411 # But ref3 has to wait for the empty. 

1412 self.assertTrue(uri3.exists()) 

1413 butler.datastore.emptyTrash() 

1414 self.assertFalse(uri3.exists()) 

1415 

1416 # Clear out the datasets from registry. 

1417 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1418 

1419 def testPytypeCoercion(self): 

1420 """Test python type coercion on Butler.get and put.""" 

1421 

1422 # Store some data with the normal example storage class. 

1423 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1424 datasetTypeName = "test_metric" 

1425 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1426 

1427 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1428 metric = butler.get(datasetTypeName, dataId=dataId) 

1429 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1430 

1431 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1432 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1433 

1434 # Now need to hack the registry dataset type definition. 

1435 # There is no API for this. 

1436 manager = butler.registry._managers.datasets 

1437 manager._db.update( 

1438 manager._static.dataset_type, 

1439 {"name": datasetTypeName}, 

1440 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1441 ) 

1442 

1443 # Force reset of dataset type cache 

1444 butler.registry.refresh() 

1445 

1446 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1447 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1448 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1449 

1450 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1451 self.assertNotEqual(type(metric_model), type(metric)) 

1452 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1453 

1454 # Put the model and read it back to show that everything now 

1455 # works as normal. 

1456 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1457 metric_model_new = butler.get(metric_ref) 

1458 self.assertEqual(metric_model_new, metric_model) 

1459 

1460 # Hack the storage class again to something that will fail on the 

1461 # get with no conversion class. 

1462 manager._db.update( 

1463 manager._static.dataset_type, 

1464 {"name": datasetTypeName}, 

1465 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1466 ) 

1467 butler.registry.refresh() 

1468 

1469 with self.assertRaises(ValueError): 

1470 butler.get(datasetTypeName, dataId=dataId) 

1471 

1472 

1473@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1474class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1475 """PosixDatastore specialization of a butler using Postgres""" 

1476 

1477 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1478 fullConfigKey = ".datastore.formatters" 

1479 validationCanFail = True 

1480 datastoreStr = ["/tmp"] 

1481 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1482 registryStr = "PostgreSQL@test" 

1483 

1484 @staticmethod 

1485 def _handler(postgresql): 

1486 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1487 with engine.begin() as connection: 

1488 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1489 

1490 @classmethod 

1491 def setUpClass(cls): 

1492 # Create the postgres test server. 

1493 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1494 cache_initialized_db=True, on_initialized=cls._handler 

1495 ) 

1496 super().setUpClass() 

1497 

1498 @classmethod 

1499 def tearDownClass(cls): 

1500 # Clean up any lingering SQLAlchemy engines/connections 

1501 # so they're closed before we shut down the server. 

1502 gc.collect() 

1503 cls.postgresql.clear_cache() 

1504 super().tearDownClass() 

1505 

1506 def setUp(self): 

1507 self.server = self.postgresql() 

1508 

1509 # Need to add a registry section to the config. 

1510 self._temp_config = False 

1511 config = Config(self.configFile) 

1512 config["registry", "db"] = self.server.url() 

1513 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1514 config.dump(fh) 

1515 self.configFile = fh.name 

1516 self._temp_config = True 

1517 super().setUp() 

1518 

1519 def tearDown(self): 

1520 self.server.stop() 

1521 if self._temp_config and os.path.exists(self.configFile): 

1522 os.remove(self.configFile) 

1523 super().tearDown() 

1524 

1525 def testMakeRepo(self): 

1526 # The base class test assumes that it's using sqlite and assumes 

1527 # the config file is acceptable to sqlite. 

1528 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1529 

1530 

1531class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1532 """InMemoryDatastore specialization of a butler""" 

1533 

1534 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1535 fullConfigKey = None 

1536 useTempRoot = False 

1537 validationCanFail = False 

1538 datastoreStr = ["datastore='InMemory"] 

1539 datastoreName = ["InMemoryDatastore@"] 

1540 registryStr = "/gen3.sqlite3" 

1541 

1542 def testIngest(self): 

1543 pass 

1544 

1545 

1546class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1547 """PosixDatastore specialization""" 

1548 

1549 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1550 fullConfigKey = ".datastore.datastores.1.formatters" 

1551 validationCanFail = True 

1552 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1553 datastoreName = [ 

1554 "InMemoryDatastore@", 

1555 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1556 "SecondDatastore", 

1557 ] 

1558 registryStr = "/gen3.sqlite3" 

1559 

1560 

1561class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1562 """Test that a yaml file in one location can refer to a root in another.""" 

1563 

1564 datastoreStr = ["dir1"] 

1565 # Disable the makeRepo test since we are deliberately not using 

1566 # butler.yaml as the config name. 

1567 fullConfigKey = None 

1568 

1569 def setUp(self): 

1570 self.root = makeTestTempDir(TESTDIR) 

1571 

1572 # Make a new repository in one place 

1573 self.dir1 = os.path.join(self.root, "dir1") 

1574 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1575 

1576 # Move the yaml file to a different place and add a "root" 

1577 self.dir2 = os.path.join(self.root, "dir2") 

1578 os.makedirs(self.dir2, exist_ok=True) 

1579 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1580 config = Config(configFile1) 

1581 config["root"] = self.dir1 

1582 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1583 config.dumpToUri(configFile2) 

1584 os.remove(configFile1) 

1585 self.tmpConfigFile = configFile2 

1586 

1587 def testFileLocations(self): 

1588 self.assertNotEqual(self.dir1, self.dir2) 

1589 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1590 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1591 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1592 

1593 

1594class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1595 """Test that a config file created by makeRepo outside of repo works.""" 

1596 

1597 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1598 

1599 def setUp(self): 

1600 self.root = makeTestTempDir(TESTDIR) 

1601 self.root2 = makeTestTempDir(TESTDIR) 

1602 

1603 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1604 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1605 

1606 def tearDown(self): 

1607 if os.path.exists(self.root2): 

1608 shutil.rmtree(self.root2, ignore_errors=True) 

1609 super().tearDown() 

1610 

1611 def testConfigExistence(self): 

1612 c = Config(self.tmpConfigFile) 

1613 uri_config = ResourcePath(c["root"]) 

1614 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1615 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1616 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1617 

1618 def testPutGet(self): 

1619 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1620 self.runPutGetTest(storageClass, "test_metric") 

1621 

1622 

1623class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1624 """Test that a config file created by makeRepo outside of repo works.""" 

1625 

1626 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1627 

1628 def setUp(self): 

1629 self.root = makeTestTempDir(TESTDIR) 

1630 self.root2 = makeTestTempDir(TESTDIR) 

1631 

1632 self.tmpConfigFile = self.root2 

1633 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1634 

1635 def testConfigExistence(self): 

1636 # Append the yaml file else Config constructor does not know the file 

1637 # type. 

1638 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1639 super().testConfigExistence() 

1640 

1641 

1642class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1643 """Test that a config file created by makeRepo outside of repo works.""" 

1644 

1645 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1646 

1647 def setUp(self): 

1648 self.root = makeTestTempDir(TESTDIR) 

1649 self.root2 = makeTestTempDir(TESTDIR) 

1650 

1651 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1652 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1653 

1654 

1655@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1656class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1657 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1658 a local in-memory SqlRegistry. 

1659 """ 

1660 

1661 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1662 fullConfigKey = None 

1663 validationCanFail = True 

1664 

1665 bucketName = "anybucketname" 

1666 """Name of the Bucket that will be used in the tests. The name is read from 

1667 the config file used with the tests during set-up. 

1668 """ 

1669 

1670 root = "butlerRoot/" 

1671 """Root repository directory expected to be used in case useTempRoot=False. 

1672 Otherwise the root is set to a 20 characters long randomly generated string 

1673 during set-up. 

1674 """ 

1675 

1676 datastoreStr = [f"datastore={root}"] 

1677 """Contains all expected root locations in a format expected to be 

1678 returned by Butler stringification. 

1679 """ 

1680 

1681 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1682 """The expected format of the S3 Datastore string.""" 

1683 

1684 registryStr = "/gen3.sqlite3" 

1685 """Expected format of the Registry string.""" 

1686 

1687 mock_s3 = mock_s3() 

1688 """The mocked s3 interface from moto.""" 

1689 

1690 def genRoot(self): 

1691 """Returns a random string of len 20 to serve as a root 

1692 name for the temporary bucket repo. 

1693 

1694 This is equivalent to tempfile.mkdtemp as this is what self.root 

1695 becomes when useTempRoot is True. 

1696 """ 

1697 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1698 return rndstr + "/" 

1699 

1700 def setUp(self): 

1701 config = Config(self.configFile) 

1702 uri = ResourcePath(config[".datastore.datastore.root"]) 

1703 self.bucketName = uri.netloc 

1704 

1705 # Enable S3 mocking of tests. 

1706 self.mock_s3.start() 

1707 

1708 # set up some fake credentials if they do not exist 

1709 self.usingDummyCredentials = setAwsEnvCredentials() 

1710 

1711 if self.useTempRoot: 

1712 self.root = self.genRoot() 

1713 rooturi = f"s3://{self.bucketName}/{self.root}" 

1714 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1715 

1716 # need local folder to store registry database 

1717 self.reg_dir = makeTestTempDir(TESTDIR) 

1718 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1719 

1720 # MOTO needs to know that we expect Bucket bucketname to exist 

1721 # (this used to be the class attribute bucketName) 

1722 s3 = boto3.resource("s3") 

1723 s3.create_bucket(Bucket=self.bucketName) 

1724 

1725 self.datastoreStr = f"datastore={self.root}" 

1726 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1727 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1728 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1729 

1730 def tearDown(self): 

1731 s3 = boto3.resource("s3") 

1732 bucket = s3.Bucket(self.bucketName) 

1733 try: 

1734 bucket.objects.all().delete() 

1735 except botocore.exceptions.ClientError as e: 

1736 if e.response["Error"]["Code"] == "404": 

1737 # the key was not reachable - pass 

1738 pass 

1739 else: 

1740 raise 

1741 

1742 bucket = s3.Bucket(self.bucketName) 

1743 bucket.delete() 

1744 

1745 # Stop the S3 mock. 

1746 self.mock_s3.stop() 

1747 

1748 # unset any potentially set dummy credentials 

1749 if self.usingDummyCredentials: 

1750 unsetAwsEnvCredentials() 

1751 

1752 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1753 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1754 

1755 if self.useTempRoot and os.path.exists(self.root): 

1756 shutil.rmtree(self.root, ignore_errors=True) 

1757 

1758 super().tearDown() 

1759 

1760 

1761class PosixDatastoreTransfers(unittest.TestCase): 

1762 """Test data transfers between butlers. 

1763 

1764 Test for different managers. UUID to UUID and integer to integer are 

1765 tested. UUID to integer is not supported since we do not currently 

1766 want to allow that. Integer to UUID is supported with the caveat 

1767 that UUID4 will be generated and this will be incorrect for raw 

1768 dataset types. The test ignores that. 

1769 """ 

1770 

1771 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1772 

1773 @classmethod 

1774 def setUpClass(cls): 

1775 cls.storageClassFactory = StorageClassFactory() 

1776 cls.storageClassFactory.addFromConfig(cls.configFile) 

1777 

1778 def setUp(self): 

1779 self.root = makeTestTempDir(TESTDIR) 

1780 self.config = Config(self.configFile) 

1781 

1782 def tearDown(self): 

1783 removeTestTempDir(self.root) 

1784 

1785 def create_butler(self, manager, label): 

1786 config = Config(self.configFile) 

1787 config["registry", "managers", "datasets"] = manager 

1788 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

1789 

1790 def create_butlers(self, manager1=None, manager2=None): 

1791 default = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

1792 if manager1 is None: 

1793 manager1 = default 

1794 if manager2 is None: 

1795 manager2 = default 

1796 self.source_butler = self.create_butler(manager1, "1") 

1797 self.target_butler = self.create_butler(manager2, "2") 

1798 

1799 def testTransferUuidToUuid(self): 

1800 self.create_butlers() 

1801 # Setting id_gen_map should have no effect here 

1802 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1803 

1804 def _enable_trust(self, datastore) -> None: 

1805 if hasattr(datastore, "trustGetRequest"): 

1806 datastore.trustGetRequest = True 

1807 elif hasattr(datastore, "datastores"): 

1808 for datastore in datastore.datastores: 

1809 if hasattr(datastore, "trustGetRequest"): 

1810 datastore.trustGetRequest = True 

1811 

1812 def testTransferMissing(self): 

1813 """Test transfers where datastore records are missing. 

1814 

1815 This is how execution butler works. 

1816 """ 

1817 self.create_butlers() 

1818 

1819 # Configure the source butler to allow trust. 

1820 self._enable_trust(self.source_butler.datastore) 

1821 

1822 self.assertButlerTransfers(purge=True) 

1823 

1824 def testTransferMissingDisassembly(self): 

1825 """Test transfers where datastore records are missing. 

1826 

1827 This is how execution butler works. 

1828 """ 

1829 self.create_butlers() 

1830 

1831 # Configure the source butler to allow trust. 

1832 self._enable_trust(self.source_butler.datastore) 

1833 

1834 # Test disassembly. 

1835 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

1836 

1837 def testAbsoluteURITransferDirect(self): 

1838 """Test transfer using an absolute URI.""" 

1839 self._absolute_transfer("auto") 

1840 

1841 def testAbsoluteURITransferCopy(self): 

1842 """Test transfer using an absolute URI.""" 

1843 self._absolute_transfer("copy") 

1844 

1845 def _absolute_transfer(self, transfer): 

1846 self.create_butlers() 

1847 

1848 storageClassName = "StructuredData" 

1849 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

1850 datasetTypeName = "random_data" 

1851 runs = ["run1", "run2"] 

1852 for run in runs: 

1853 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

1854 

1855 dimensions = self.source_butler.registry.dimensions.extract(()) 

1856 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1857 self.source_butler.registry.registerDatasetType(datasetType) 

1858 

1859 metrics = makeExampleMetrics() 

1860 with ResourcePath.temporary_uri(suffix=".json") as temp: 

1861 source_refs = [DatasetRef(datasetType, {})] 

1862 temp.write(json.dumps(metrics.exportAsDict()).encode()) 

1863 dataset = FileDataset(path=temp, refs=source_refs) 

1864 self.source_butler.ingest(dataset, transfer="direct", run="run1") 

1865 

1866 self.target_butler.transfer_from( 

1867 self.source_butler, dataset.refs, register_dataset_types=True, transfer=transfer 

1868 ) 

1869 

1870 uri = self.target_butler.getURI(dataset.refs[0]) 

1871 if transfer == "auto": 

1872 self.assertEqual(uri, temp) 

1873 else: 

1874 self.assertNotEqual(uri, temp) 

1875 

1876 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"): 

1877 """Test that a run can be transferred to another butler.""" 

1878 

1879 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

1880 datasetTypeName = "random_data" 

1881 

1882 # Test will create 3 collections and we will want to transfer 

1883 # two of those three. 

1884 runs = ["run1", "run2", "other"] 

1885 

1886 # Also want to use two different dataset types to ensure that 

1887 # grouping works. 

1888 datasetTypeNames = ["random_data", "random_data_2"] 

1889 

1890 # Create the run collections in the source butler. 

1891 for run in runs: 

1892 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

1893 

1894 # Create dimensions in source butler. 

1895 n_exposures = 30 

1896 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1897 self.source_butler.registry.insertDimensionData( 

1898 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1899 ) 

1900 self.source_butler.registry.insertDimensionData( 

1901 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1902 ) 

1903 

1904 for i in range(n_exposures): 

1905 self.source_butler.registry.insertDimensionData( 

1906 "exposure", 

1907 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

1908 ) 

1909 

1910 # Create dataset types in the source butler. 

1911 dimensions = self.source_butler.registry.dimensions.extract(["instrument", "exposure"]) 

1912 for datasetTypeName in datasetTypeNames: 

1913 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1914 self.source_butler.registry.registerDatasetType(datasetType) 

1915 

1916 # Write a dataset to an unrelated run -- this will ensure that 

1917 # we are rewriting integer dataset ids in the target if necessary. 

1918 # Will not be relevant for UUID. 

1919 run = "distraction" 

1920 butler = Butler(butler=self.source_butler, run=run) 

1921 butler.put( 

1922 makeExampleMetrics(), 

1923 datasetTypeName, 

1924 exposure=1, 

1925 instrument="DummyCamComp", 

1926 physical_filter="d-r", 

1927 ) 

1928 

1929 # Write some example metrics to the source 

1930 butler = Butler(butler=self.source_butler) 

1931 

1932 # Set of DatasetRefs that should be in the list of refs to transfer 

1933 # but which will not be transferred. 

1934 deleted = set() 

1935 

1936 n_expected = 20 # Number of datasets expected to be transferred 

1937 source_refs = [] 

1938 for i in range(n_exposures): 

1939 # Put a third of datasets into each collection, only retain 

1940 # two thirds. 

1941 index = i % 3 

1942 run = runs[index] 

1943 datasetTypeName = datasetTypeNames[i % 2] 

1944 

1945 metric_data = { 

1946 "summary": {"counter": i}, 

1947 "output": {"text": "metric"}, 

1948 "data": [2 * x for x in range(i)], 

1949 } 

1950 metric = MetricsExample(**metric_data) 

1951 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1952 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

1953 

1954 # Remove the datastore record using low-level API 

1955 if purge: 

1956 # Remove records for a fraction. 

1957 if index == 1: 

1958 # For one of these delete the file as well. 

1959 # This allows the "missing" code to filter the 

1960 # file out. 

1961 # Access the individual datastores. 

1962 datastores = [] 

1963 if hasattr(butler.datastore, "datastores"): 

1964 datastores.extend(butler.datastore.datastores) 

1965 else: 

1966 datastores.append(butler.datastore) 

1967 

1968 if not deleted: 

1969 # For a chained datastore we need to remove 

1970 # files in each chain. 

1971 for datastore in datastores: 

1972 # The file might not be known to the datastore 

1973 # if constraints are used. 

1974 try: 

1975 primary, uris = datastore.getURIs(ref) 

1976 except FileNotFoundError: 

1977 continue 

1978 if primary: 

1979 if primary.scheme != "mem": 

1980 primary.remove() 

1981 for uri in uris.values(): 

1982 if uri.scheme != "mem": 

1983 uri.remove() 

1984 n_expected -= 1 

1985 deleted.add(ref) 

1986 

1987 # Remove the datastore record. 

1988 for datastore in datastores: 

1989 if hasattr(datastore, "removeStoredItemInfo"): 

1990 datastore.removeStoredItemInfo(ref) 

1991 

1992 if index < 2: 

1993 source_refs.append(ref) 

1994 if ref not in deleted: 

1995 new_metric = butler.get(ref.unresolved(), collections=run) 

1996 self.assertEqual(new_metric, metric) 

1997 

1998 # Create some bad dataset types to ensure we check for inconsistent 

1999 # definitions. 

2000 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2001 for datasetTypeName in datasetTypeNames: 

2002 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2003 self.target_butler.registry.registerDatasetType(datasetType) 

2004 with self.assertRaises(ConflictingDefinitionError) as cm: 

2005 self.target_butler.transfer_from(self.source_butler, source_refs) 

2006 self.assertIn("dataset type differs", str(cm.exception)) 

2007 

2008 # And remove the bad definitions. 

2009 for datasetTypeName in datasetTypeNames: 

2010 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2011 

2012 # Transfer without creating dataset types should fail. 

2013 with self.assertRaises(KeyError): 

2014 self.target_butler.transfer_from(self.source_butler, source_refs) 

2015 

2016 # Transfer without creating dimensions should fail. 

2017 with self.assertRaises(ConflictingDefinitionError) as cm: 

2018 self.target_butler.transfer_from(self.source_butler, source_refs, register_dataset_types=True) 

2019 self.assertIn("dimension", str(cm.exception)) 

2020 

2021 # The failed transfer above leaves registry in an inconsistent 

2022 # state because the run is created but then rolled back without 

2023 # the collection cache being cleared. For now force a refresh. 

2024 # Can remove with DM-35498. 

2025 self.target_butler.registry.refresh() 

2026 

2027 # Now transfer them to the second butler, including dimensions. 

2028 with self.assertLogs(level=logging.DEBUG) as cm: 

2029 transferred = self.target_butler.transfer_from( 

2030 self.source_butler, 

2031 source_refs, 

2032 register_dataset_types=True, 

2033 transfer_dimensions=True, 

2034 ) 

2035 self.assertEqual(len(transferred), n_expected) 

2036 log_output = ";".join(cm.output) 

2037 

2038 # A ChainedDatastore will use the in-memory datastore for mexists 

2039 # so we can not rely on the mexists log message. 

2040 self.assertIn("Number of datastore records found in source", log_output) 

2041 self.assertIn("Creating output run", log_output) 

2042 

2043 # Do the transfer twice to ensure that it will do nothing extra. 

2044 # Only do this if purge=True because it does not work for int 

2045 # dataset_id. 

2046 if purge: 

2047 # This should not need to register dataset types. 

2048 transferred = self.target_butler.transfer_from(self.source_butler, source_refs) 

2049 self.assertEqual(len(transferred), n_expected) 

2050 

2051 # Also do an explicit low-level transfer to trigger some 

2052 # edge cases. 

2053 with self.assertLogs(level=logging.DEBUG) as cm: 

2054 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

2055 log_output = ";".join(cm.output) 

2056 self.assertIn("no file artifacts exist", log_output) 

2057 

2058 with self.assertRaises((TypeError, AttributeError)): 

2059 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) 

2060 

2061 with self.assertRaises(ValueError): 

2062 self.target_butler.datastore.transfer_from( 

2063 self.source_butler.datastore, source_refs, transfer="split" 

2064 ) 

2065 

2066 # Now try to get the same refs from the new butler. 

2067 for ref in source_refs: 

2068 if ref not in deleted: 

2069 unresolved_ref = ref.unresolved() 

2070 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run) 

2071 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run) 

2072 self.assertEqual(new_metric, old_metric) 

2073 

2074 # Now prune run2 collection and create instead a CHAINED collection. 

2075 # This should block the transfer. 

2076 self.target_butler.removeRuns(["run2"], unstore=True) 

2077 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2078 with self.assertRaises(CollectionTypeError): 

2079 # Re-importing the run1 datasets can be problematic if they 

2080 # use integer IDs so filter those out. 

2081 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2082 self.target_butler.transfer_from(self.source_butler, to_transfer) 

2083 

2084 

2085class ChainedDatastoreTransfers(PosixDatastoreTransfers): 

2086 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

2087 

2088 

2089if __name__ == "__main__": 

2090 unittest.main()