Coverage for tests/test_butler.py: 12%

1147 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-06 09:33 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import gc 

26import json 

27import logging 

28import os 

29import pathlib 

30import pickle 

31import posixpath 

32import random 

33import shutil 

34import string 

35import tempfile 

36import unittest 

37 

38try: 

39 import boto3 

40 import botocore 

41 from moto import mock_s3 

42except ImportError: 

43 boto3 = None 

44 

45 def mock_s3(cls): 

46 """A no-op decorator in case moto mock_s3 can not be imported.""" 

47 return cls 

48 

49 

50try: 

51 # It's possible but silly to have testing.postgresql installed without 

52 # having the postgresql server installed (because then nothing in 

53 # testing.postgresql would work), so we use the presence of that module 

54 # to test whether we can expect the server to be available. 

55 import testing.postgresql 

56except ImportError: 

57 testing = None 

58 

59import astropy.time 

60import sqlalchemy 

61from lsst.daf.butler import ( 

62 Butler, 

63 ButlerConfig, 

64 CollectionType, 

65 Config, 

66 DatasetIdGenEnum, 

67 DatasetRef, 

68 DatasetType, 

69 FileDataset, 

70 FileTemplate, 

71 FileTemplateValidationError, 

72 StorageClassFactory, 

73 ValidationError, 

74 script, 

75) 

76from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

77from lsst.daf.butler.registry import ( 

78 CollectionError, 

79 CollectionTypeError, 

80 ConflictingDefinitionError, 

81 DataIdValueError, 

82 MissingCollectionError, 

83 OrphanedRecordError, 

84) 

85from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

86from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

87from lsst.resources import ResourcePath 

88from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

89from lsst.utils import doImport 

90from lsst.utils.introspection import get_full_type_name 

91 

92TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

93 

94 

95def makeExampleMetrics(): 

96 return MetricsExample( 

97 {"AM1": 5.2, "AM2": 30.6}, 

98 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

99 [563, 234, 456.7, 752, 8, 9, 27], 

100 ) 

101 

102 

103class TransactionTestError(Exception): 

104 """Specific error for testing transactions, to prevent misdiagnosing 

105 that might otherwise occur when a standard exception is used. 

106 """ 

107 

108 pass 

109 

110 

111class ButlerConfigTests(unittest.TestCase): 

112 """Simple tests for ButlerConfig that are not tested in any other test 

113 cases.""" 

114 

115 def testSearchPath(self): 

116 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

117 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

118 config1 = ButlerConfig(configFile) 

119 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

120 

121 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

122 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

123 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

124 self.assertIn("testConfigs", "\n".join(cm.output)) 

125 

126 key = ("datastore", "records", "table") 

127 self.assertNotEqual(config1[key], config2[key]) 

128 self.assertEqual(config2[key], "override_record") 

129 

130 

131class ButlerPutGetTests: 

132 """Helper method for running a suite of put/get tests from different 

133 butler configurations.""" 

134 

135 root = None 

136 default_run = "ingésτ😺" 

137 

138 @staticmethod 

139 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

140 """Create a DatasetType and register it""" 

141 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

142 registry.registerDatasetType(datasetType) 

143 return datasetType 

144 

145 @classmethod 

146 def setUpClass(cls): 

147 cls.storageClassFactory = StorageClassFactory() 

148 cls.storageClassFactory.addFromConfig(cls.configFile) 

149 

150 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

151 datasetType = datasetRef.datasetType 

152 dataId = datasetRef.dataId 

153 deferred = butler.getDeferred(datasetRef) 

154 

155 for component in components: 

156 compTypeName = datasetType.componentTypeName(component) 

157 result = butler.get(compTypeName, dataId, collections=collections) 

158 self.assertEqual(result, getattr(reference, component)) 

159 result_deferred = deferred.get(component=component) 

160 self.assertEqual(result_deferred, result) 

161 

162 def tearDown(self): 

163 removeTestTempDir(self.root) 

164 

165 def create_butler(self, run, storageClass, datasetTypeName): 

166 butler = Butler(self.tmpConfigFile, run=run) 

167 

168 collections = set(butler.registry.queryCollections()) 

169 self.assertEqual(collections, set([run])) 

170 

171 # Create and register a DatasetType 

172 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

173 

174 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

175 

176 # Add needed Dimensions 

177 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

178 butler.registry.insertDimensionData( 

179 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

180 ) 

181 butler.registry.insertDimensionData( 

182 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

183 ) 

184 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

185 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

186 butler.registry.insertDimensionData( 

187 "visit", 

188 { 

189 "instrument": "DummyCamComp", 

190 "id": 423, 

191 "name": "fourtwentythree", 

192 "physical_filter": "d-r", 

193 "visit_system": 1, 

194 "datetime_begin": visit_start, 

195 "datetime_end": visit_end, 

196 }, 

197 ) 

198 

199 # Add more visits for some later tests 

200 for visit_id in (424, 425): 

201 butler.registry.insertDimensionData( 

202 "visit", 

203 { 

204 "instrument": "DummyCamComp", 

205 "id": visit_id, 

206 "name": f"fourtwentyfour_{visit_id}", 

207 "physical_filter": "d-r", 

208 "visit_system": 1, 

209 }, 

210 ) 

211 return butler, datasetType 

212 

213 def runPutGetTest(self, storageClass, datasetTypeName): 

214 # New datasets will be added to run and tag, but we will only look in 

215 # tag when looking up datasets. 

216 run = self.default_run 

217 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

218 

219 # Create and store a dataset 

220 metric = makeExampleMetrics() 

221 dataId = {"instrument": "DummyCamComp", "visit": 423} 

222 

223 # Create a DatasetRef for put 

224 refIn = DatasetRef(datasetType, dataId, id=None) 

225 

226 # Put with a preexisting id should fail 

227 with self.assertRaises(ValueError): 

228 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

229 

230 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

231 # and once with a DatasetType 

232 

233 # Keep track of any collections we add and do not clean up 

234 expected_collections = {run} 

235 

236 counter = 0 

237 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

238 # Since we are using subTest we can get cascading failures 

239 # here with the first attempt failing and the others failing 

240 # immediately because the dataset already exists. Work around 

241 # this by using a distinct run collection each time 

242 counter += 1 

243 this_run = f"put_run_{counter}" 

244 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

245 expected_collections.update({this_run}) 

246 

247 with self.subTest(args=args): 

248 ref = butler.put(metric, *args, run=this_run) 

249 self.assertIsInstance(ref, DatasetRef) 

250 

251 # Test getDirect 

252 metricOut = butler.get(ref) 

253 self.assertEqual(metric, metricOut) 

254 # Test get 

255 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

256 self.assertEqual(metric, metricOut) 

257 # Test get with a datasetRef 

258 metricOut = butler.get(ref, collections=this_run) 

259 self.assertEqual(metric, metricOut) 

260 # Test getDeferred with dataId 

261 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

262 self.assertEqual(metric, metricOut) 

263 # Test getDeferred with a datasetRef 

264 metricOut = butler.getDeferred(ref, collections=this_run).get() 

265 self.assertEqual(metric, metricOut) 

266 # and deferred direct with ref 

267 metricOut = butler.getDeferred(ref).get() 

268 self.assertEqual(metric, metricOut) 

269 

270 # Check we can get components 

271 if storageClass.isComposite(): 

272 self.assertGetComponents( 

273 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

274 ) 

275 

276 # Can the artifacts themselves be retrieved? 

277 if not butler.datastore.isEphemeral: 

278 root_uri = ResourcePath(self.root) 

279 

280 for preserve_path in (True, False): 

281 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

282 # Use copy so that we can test that overwrite 

283 # protection works (using "auto" for File URIs would 

284 # use hard links and subsequent transfer would work 

285 # because it knows they are the same file). 

286 transferred = butler.retrieveArtifacts( 

287 [ref], destination, preserve_path=preserve_path, transfer="copy" 

288 ) 

289 self.assertGreater(len(transferred), 0) 

290 artifacts = list(ResourcePath.findFileResources([destination])) 

291 self.assertEqual(set(transferred), set(artifacts)) 

292 

293 for artifact in transferred: 

294 path_in_destination = artifact.relative_to(destination) 

295 self.assertIsNotNone(path_in_destination) 

296 

297 # when path is not preserved there should not be 

298 # any path separators. 

299 num_seps = path_in_destination.count("/") 

300 if preserve_path: 

301 self.assertGreater(num_seps, 0) 

302 else: 

303 self.assertEqual(num_seps, 0) 

304 

305 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

306 n_uris = len(secondary_uris) 

307 if primary_uri: 

308 n_uris += 1 

309 self.assertEqual( 

310 len(artifacts), 

311 n_uris, 

312 "Comparing expected artifacts vs actual:" 

313 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

314 ) 

315 

316 if preserve_path: 

317 # No need to run these twice 

318 with self.assertRaises(ValueError): 

319 butler.retrieveArtifacts([ref], destination, transfer="move") 

320 

321 with self.assertRaises(FileExistsError): 

322 butler.retrieveArtifacts([ref], destination) 

323 

324 transferred_again = butler.retrieveArtifacts( 

325 [ref], destination, preserve_path=preserve_path, overwrite=True 

326 ) 

327 self.assertEqual(set(transferred_again), set(transferred)) 

328 

329 # Now remove the dataset completely. 

330 butler.pruneDatasets([ref], purge=True, unstore=True) 

331 # Lookup with original args should still fail. 

332 with self.assertRaises(LookupError): 

333 butler.datasetExists(*args, collections=this_run) 

334 # get() should still fail. 

335 with self.assertRaises(FileNotFoundError): 

336 butler.get(ref) 

337 # Registry shouldn't be able to find it by dataset_id anymore. 

338 self.assertIsNone(butler.registry.getDataset(ref.id)) 

339 

340 # Do explicit registry removal since we know they are 

341 # empty 

342 butler.registry.removeCollection(this_run) 

343 expected_collections.remove(this_run) 

344 

345 # Put the dataset again, since the last thing we did was remove it 

346 # and we want to use the default collection. 

347 ref = butler.put(metric, refIn) 

348 

349 # Get with parameters 

350 stop = 4 

351 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

352 self.assertNotEqual(metric, sliced) 

353 self.assertEqual(metric.summary, sliced.summary) 

354 self.assertEqual(metric.output, sliced.output) 

355 self.assertEqual(metric.data[:stop], sliced.data) 

356 # getDeferred with parameters 

357 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

358 self.assertNotEqual(metric, sliced) 

359 self.assertEqual(metric.summary, sliced.summary) 

360 self.assertEqual(metric.output, sliced.output) 

361 self.assertEqual(metric.data[:stop], sliced.data) 

362 # getDeferred with deferred parameters 

363 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

364 self.assertNotEqual(metric, sliced) 

365 self.assertEqual(metric.summary, sliced.summary) 

366 self.assertEqual(metric.output, sliced.output) 

367 self.assertEqual(metric.data[:stop], sliced.data) 

368 

369 if storageClass.isComposite(): 

370 # Check that components can be retrieved 

371 metricOut = butler.get(ref.datasetType.name, dataId) 

372 compNameS = ref.datasetType.componentTypeName("summary") 

373 compNameD = ref.datasetType.componentTypeName("data") 

374 summary = butler.get(compNameS, dataId) 

375 self.assertEqual(summary, metric.summary) 

376 data = butler.get(compNameD, dataId) 

377 self.assertEqual(data, metric.data) 

378 

379 if "counter" in storageClass.derivedComponents: 

380 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

381 self.assertEqual(count, len(data)) 

382 

383 count = butler.get( 

384 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

385 ) 

386 self.assertEqual(count, stop) 

387 

388 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

389 summary = butler.get(compRef) 

390 self.assertEqual(summary, metric.summary) 

391 

392 # Create a Dataset type that has the same name but is inconsistent. 

393 inconsistentDatasetType = DatasetType( 

394 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

395 ) 

396 

397 # Getting with a dataset type that does not match registry fails 

398 with self.assertRaises(ValueError): 

399 butler.get(inconsistentDatasetType, dataId) 

400 

401 # Combining a DatasetRef with a dataId should fail 

402 with self.assertRaises(ValueError): 

403 butler.get(ref, dataId) 

404 # Getting with an explicit ref should fail if the id doesn't match 

405 with self.assertRaises(ValueError): 

406 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

407 

408 # Getting a dataset with unknown parameters should fail 

409 with self.assertRaises(KeyError): 

410 butler.get(ref, parameters={"unsupported": True}) 

411 

412 # Check we have a collection 

413 collections = set(butler.registry.queryCollections()) 

414 self.assertEqual(collections, expected_collections) 

415 

416 # Clean up to check that we can remove something that may have 

417 # already had a component removed 

418 butler.pruneDatasets([ref], unstore=True, purge=True) 

419 

420 # Check that we can configure a butler to accept a put even 

421 # if it already has the dataset in registry. 

422 ref = butler.put(metric, refIn) 

423 

424 # Repeat put will fail. 

425 with self.assertRaises(ConflictingDefinitionError): 

426 butler.put(metric, refIn) 

427 

428 # Remove the datastore entry. 

429 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

430 

431 # Put will still fail 

432 with self.assertRaises(ConflictingDefinitionError): 

433 butler.put(metric, refIn) 

434 

435 # Allow the put to succeed 

436 butler._allow_put_of_predefined_dataset = True 

437 ref2 = butler.put(metric, refIn) 

438 self.assertEqual(ref2.id, ref.id) 

439 

440 # A second put will still fail but with a different exception 

441 # than before. 

442 with self.assertRaises(ConflictingDefinitionError): 

443 butler.put(metric, refIn) 

444 

445 # Reset the flag to avoid confusion 

446 butler._allow_put_of_predefined_dataset = False 

447 

448 # Leave the dataset in place since some downstream tests require 

449 # something to be present 

450 

451 return butler 

452 

453 def testDeferredCollectionPassing(self): 

454 # Construct a butler with no run or collection, but make it writeable. 

455 butler = Butler(self.tmpConfigFile, writeable=True) 

456 # Create and register a DatasetType 

457 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

458 datasetType = self.addDatasetType( 

459 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

460 ) 

461 # Add needed Dimensions 

462 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

463 butler.registry.insertDimensionData( 

464 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

465 ) 

466 butler.registry.insertDimensionData( 

467 "visit", 

468 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

469 ) 

470 dataId = {"instrument": "DummyCamComp", "visit": 423} 

471 # Create dataset. 

472 metric = makeExampleMetrics() 

473 # Register a new run and put dataset. 

474 run = "deferred" 

475 self.assertTrue(butler.registry.registerRun(run)) 

476 # Second time it will be allowed but indicate no-op 

477 self.assertFalse(butler.registry.registerRun(run)) 

478 ref = butler.put(metric, datasetType, dataId, run=run) 

479 # Putting with no run should fail with TypeError. 

480 with self.assertRaises(CollectionError): 

481 butler.put(metric, datasetType, dataId) 

482 # Dataset should exist. 

483 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

484 # We should be able to get the dataset back, but with and without 

485 # a deferred dataset handle. 

486 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

487 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

488 # Trying to find the dataset without any collection is a TypeError. 

489 with self.assertRaises(CollectionError): 

490 butler.datasetExists(datasetType, dataId) 

491 with self.assertRaises(CollectionError): 

492 butler.get(datasetType, dataId) 

493 # Associate the dataset with a different collection. 

494 butler.registry.registerCollection("tagged") 

495 butler.registry.associate("tagged", [ref]) 

496 # Deleting the dataset from the new collection should make it findable 

497 # in the original collection. 

498 butler.pruneDatasets([ref], tags=["tagged"]) 

499 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

500 

501 

502class ButlerTests(ButlerPutGetTests): 

503 """Tests for Butler.""" 

504 

505 useTempRoot = True 

506 

507 def setUp(self): 

508 """Create a new butler root for each test.""" 

509 self.root = makeTestTempDir(TESTDIR) 

510 Butler.makeRepo(self.root, config=Config(self.configFile)) 

511 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

512 

513 def testConstructor(self): 

514 """Independent test of constructor.""" 

515 butler = Butler(self.tmpConfigFile, run=self.default_run) 

516 self.assertIsInstance(butler, Butler) 

517 

518 # Check that butler.yaml is added automatically. 

519 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

520 config_dir = self.tmpConfigFile[: -len(end)] 

521 butler = Butler(config_dir, run=self.default_run) 

522 self.assertIsInstance(butler, Butler) 

523 

524 # Even with a ResourcePath. 

525 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

526 self.assertIsInstance(butler, Butler) 

527 

528 collections = set(butler.registry.queryCollections()) 

529 self.assertEqual(collections, {self.default_run}) 

530 

531 # Check that some special characters can be included in run name. 

532 special_run = "u@b.c-A" 

533 butler_special = Butler(butler=butler, run=special_run) 

534 collections = set(butler_special.registry.queryCollections("*@*")) 

535 self.assertEqual(collections, {special_run}) 

536 

537 butler2 = Butler(butler=butler, collections=["other"]) 

538 self.assertEqual(butler2.collections, ("other",)) 

539 self.assertIsNone(butler2.run) 

540 self.assertIs(butler.datastore, butler2.datastore) 

541 

542 # Test that we can use an environment variable to find this 

543 # repository. 

544 butler_index = Config() 

545 butler_index["label"] = self.tmpConfigFile 

546 for suffix in (".yaml", ".json"): 

547 # Ensure that the content differs so that we know that 

548 # we aren't reusing the cache. 

549 bad_label = f"s3://bucket/not_real{suffix}" 

550 butler_index["bad_label"] = bad_label 

551 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

552 butler_index.dumpToUri(temp_file) 

553 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

554 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label"))) 

555 uri = Butler.get_repo_uri("bad_label") 

556 self.assertEqual(uri, ResourcePath(bad_label)) 

557 uri = Butler.get_repo_uri("label") 

558 butler = Butler(uri, writeable=False) 

559 self.assertIsInstance(butler, Butler) 

560 butler = Butler("label", writeable=False) 

561 self.assertIsInstance(butler, Butler) 

562 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

563 Butler("not_there", writeable=False) 

564 with self.assertRaises(KeyError) as cm: 

565 Butler.get_repo_uri("missing") 

566 self.assertIn("not known to", str(cm.exception)) 

567 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

568 with self.assertRaises(FileNotFoundError): 

569 Butler.get_repo_uri("label") 

570 self.assertEqual(Butler.get_known_repos(), set()) 

571 with self.assertRaises(KeyError) as cm: 

572 # No environment variable set. 

573 Butler.get_repo_uri("label") 

574 self.assertIn("No repository index defined", str(cm.exception)) 

575 with self.assertRaisesRegex(FileNotFoundError, "no known aliases"): 

576 # No aliases registered. 

577 Butler("not_there") 

578 self.assertEqual(Butler.get_known_repos(), set()) 

579 

580 def testBasicPutGet(self): 

581 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

582 self.runPutGetTest(storageClass, "test_metric") 

583 

584 def testCompositePutGetConcrete(self): 

585 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

586 butler = self.runPutGetTest(storageClass, "test_metric") 

587 

588 # Should *not* be disassembled 

589 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

590 self.assertEqual(len(datasets), 1) 

591 uri, components = butler.getURIs(datasets[0]) 

592 self.assertIsInstance(uri, ResourcePath) 

593 self.assertFalse(components) 

594 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

595 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

596 

597 # Predicted dataset 

598 dataId = {"instrument": "DummyCamComp", "visit": 424} 

599 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

600 self.assertFalse(components) 

601 self.assertIsInstance(uri, ResourcePath) 

602 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

603 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

604 

605 def testCompositePutGetVirtual(self): 

606 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

607 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

608 

609 # Should be disassembled 

610 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

611 self.assertEqual(len(datasets), 1) 

612 uri, components = butler.getURIs(datasets[0]) 

613 

614 if butler.datastore.isEphemeral: 

615 # Never disassemble in-memory datastore 

616 self.assertIsInstance(uri, ResourcePath) 

617 self.assertFalse(components) 

618 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

619 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

620 else: 

621 self.assertIsNone(uri) 

622 self.assertEqual(set(components), set(storageClass.components)) 

623 for compuri in components.values(): 

624 self.assertIsInstance(compuri, ResourcePath) 

625 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

626 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

627 

628 # Predicted dataset 

629 dataId = {"instrument": "DummyCamComp", "visit": 424} 

630 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

631 

632 if butler.datastore.isEphemeral: 

633 # Never disassembled 

634 self.assertIsInstance(uri, ResourcePath) 

635 self.assertFalse(components) 

636 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

637 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

638 else: 

639 self.assertIsNone(uri) 

640 self.assertEqual(set(components), set(storageClass.components)) 

641 for compuri in components.values(): 

642 self.assertIsInstance(compuri, ResourcePath) 

643 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

644 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

645 

646 def testStorageClassOverrideGet(self): 

647 """Test storage class conversion on get with override.""" 

648 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

649 datasetTypeName = "anything" 

650 run = self.default_run 

651 

652 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

653 

654 # Create and store a dataset. 

655 metric = makeExampleMetrics() 

656 dataId = {"instrument": "DummyCamComp", "visit": 423} 

657 

658 ref = butler.put(metric, datasetType, dataId) 

659 

660 # Return native type. 

661 retrieved = butler.get(ref) 

662 self.assertEqual(retrieved, metric) 

663 

664 # Specify an override. 

665 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion") 

666 model = butler.get(ref, storageClass=new_sc) 

667 self.assertNotEqual(type(model), type(retrieved)) 

668 self.assertIs(type(model), new_sc.pytype) 

669 self.assertEqual(retrieved, model) 

670 

671 # Defer but override later. 

672 deferred = butler.getDeferred(ref) 

673 model = deferred.get(storageClass=new_sc) 

674 self.assertIs(type(model), new_sc.pytype) 

675 self.assertEqual(retrieved, model) 

676 

677 # Defer but override up front. 

678 deferred = butler.getDeferred(ref, storageClass=new_sc) 

679 model = deferred.get() 

680 self.assertIs(type(model), new_sc.pytype) 

681 self.assertEqual(retrieved, model) 

682 

683 # Retrieve a component. Should be a tuple. 

684 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple") 

685 self.assertIs(type(data), tuple) 

686 self.assertEqual(data, tuple(retrieved.data)) 

687 

688 # Parameter on the write storage class should work regardless 

689 # of read storage class. 

690 data = butler.get( 

691 "anything.data", 

692 dataId, 

693 storageClass="StructuredDataDataTestTuple", 

694 parameters={"slice": slice(2, 4)}, 

695 ) 

696 self.assertEqual(len(data), 2) 

697 

698 # Try a parameter that is known to the read storage class but not 

699 # the write storage class. 

700 with self.assertRaises(KeyError): 

701 butler.get( 

702 "anything.data", 

703 dataId, 

704 storageClass="StructuredDataDataTestTuple", 

705 parameters={"xslice": slice(2, 4)}, 

706 ) 

707 

708 def testPytypePutCoercion(self): 

709 """Test python type coercion on Butler.get and put.""" 

710 

711 # Store some data with the normal example storage class. 

712 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

713 datasetTypeName = "test_metric" 

714 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

715 

716 dataId = {"instrument": "DummyCamComp", "visit": 423} 

717 

718 # Put a dict and this should coerce to a MetricsExample 

719 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

720 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

721 test_metric = butler.get(metric_ref) 

722 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

723 self.assertEqual(test_metric.summary, test_dict["summary"]) 

724 self.assertEqual(test_metric.output, test_dict["output"]) 

725 

726 # Check that the put still works if a DatasetType is given with 

727 # a definition matching this python type. 

728 registry_type = butler.registry.getDatasetType(datasetTypeName) 

729 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

730 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

731 self.assertEqual(metric2_ref.datasetType, registry_type) 

732 

733 # The get will return the type expected by registry. 

734 test_metric2 = butler.get(metric2_ref) 

735 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

736 

737 # Make a new DatasetRef with the compatible but different DatasetType. 

738 # This should now return a dict. 

739 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

740 test_dict2 = butler.get(new_ref) 

741 self.assertEqual(get_full_type_name(test_dict2), "dict") 

742 

743 # Get it again with the wrong dataset type definition using get() 

744 # rather than get(). This should be consistent with get() 

745 # behavior and return the type of the DatasetType. 

746 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

747 self.assertEqual(get_full_type_name(test_dict3), "dict") 

748 

749 def testIngest(self): 

750 butler = Butler(self.tmpConfigFile, run=self.default_run) 

751 

752 # Create and register a DatasetType 

753 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

754 

755 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

756 datasetTypeName = "metric" 

757 

758 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

759 

760 # Add needed Dimensions 

761 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

762 butler.registry.insertDimensionData( 

763 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

764 ) 

765 for detector in (1, 2): 

766 butler.registry.insertDimensionData( 

767 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

768 ) 

769 

770 butler.registry.insertDimensionData( 

771 "visit", 

772 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

773 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

774 ) 

775 

776 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

777 dataRoot = os.path.join(TESTDIR, "data", "basic") 

778 datasets = [] 

779 for detector in (1, 2): 

780 detector_name = f"detector_{detector}" 

781 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

782 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

783 # Create a DatasetRef for ingest 

784 refIn = DatasetRef(datasetType, dataId, run=self.default_run) 

785 

786 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

787 

788 butler.ingest(*datasets, transfer="copy") 

789 

790 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

791 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

792 

793 metrics1 = butler.get(datasetTypeName, dataId1) 

794 metrics2 = butler.get(datasetTypeName, dataId2) 

795 self.assertNotEqual(metrics1, metrics2) 

796 

797 # Compare URIs 

798 uri1 = butler.getURI(datasetTypeName, dataId1) 

799 uri2 = butler.getURI(datasetTypeName, dataId2) 

800 self.assertNotEqual(uri1, uri2) 

801 

802 # Now do a multi-dataset but single file ingest 

803 metricFile = os.path.join(dataRoot, "detectors.yaml") 

804 refs = [] 

805 for detector in (1, 2): 

806 detector_name = f"detector_{detector}" 

807 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

808 # Create a DatasetRef for ingest 

809 refs.append(DatasetRef(datasetType, dataId, run=self.default_run)) 

810 

811 # Test "move" transfer to ensure that the files themselves 

812 # have disappeared following ingest. 

813 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile: 

814 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy") 

815 

816 datasets = [] 

817 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter)) 

818 

819 # For first ingest use copy. 

820 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

821 

822 # Now try to ingest again in "execution butler" mode where 

823 # the registry entries exist but the datastore does not have 

824 # the files. We also need to strip the dimension records to ensure 

825 # that they will be re-added by the ingest. 

826 ref = datasets[0].refs[0] 

827 datasets[0].refs = [ 

828 butler.registry.findDataset(ref.datasetType, dataId=ref.dataId, collections=ref.run) 

829 for ref in datasets[0].refs 

830 ] 

831 all_refs = [] 

832 for dataset in datasets: 

833 refs = [] 

834 for ref in dataset.refs: 

835 # Create a dict from the dataId to drop the records. 

836 new_data_id = {str(k): v for k, v in ref.dataId.items()} 

837 new_ref = butler.registry.findDataset(ref.datasetType, **new_data_id, collections=ref.run) 

838 self.assertFalse(new_ref.dataId.hasRecords()) 

839 refs.append(new_ref) 

840 dataset.refs = refs 

841 all_refs.extend(dataset.refs) 

842 butler.pruneDatasets(all_refs, disassociate=False, unstore=True, purge=False) 

843 

844 butler._allow_put_of_predefined_dataset = True 

845 

846 # Use move mode to test that the file is deleted. Also 

847 # disable recording of file size. 

848 butler.ingest(*datasets, transfer="move", record_validation_info=False) 

849 

850 # Check that every ref now has records. 

851 for dataset in datasets: 

852 for ref in dataset.refs: 

853 self.assertTrue(ref.dataId.hasRecords()) 

854 

855 # Ensure that the file has disappeared. 

856 self.assertFalse(tempFile.exists()) 

857 

858 # Check that the datastore recorded no file size. 

859 # Not all datastores can support this. 

860 try: 

861 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0]) 

862 self.assertEqual(infos[0].file_size, -1) 

863 except AttributeError: 

864 pass 

865 

866 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

867 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

868 

869 multi1 = butler.get(datasetTypeName, dataId1) 

870 multi2 = butler.get(datasetTypeName, dataId2) 

871 

872 self.assertEqual(multi1, metrics1) 

873 self.assertEqual(multi2, metrics2) 

874 

875 # Compare URIs 

876 uri1 = butler.getURI(datasetTypeName, dataId1) 

877 uri2 = butler.getURI(datasetTypeName, dataId2) 

878 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

879 

880 # Test that removing one does not break the second 

881 # This line will issue a warning log message for a ChainedDatastore 

882 # that uses an InMemoryDatastore since in-memory can not ingest 

883 # files. 

884 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

885 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

886 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

887 multi2b = butler.get(datasetTypeName, dataId2) 

888 self.assertEqual(multi2, multi2b) 

889 

890 # Ensure we can ingest 0 datasets 

891 datasets = [] 

892 butler.ingest(*datasets) 

893 

894 def testPickle(self): 

895 """Test pickle support.""" 

896 butler = Butler(self.tmpConfigFile, run=self.default_run) 

897 butlerOut = pickle.loads(pickle.dumps(butler)) 

898 self.assertIsInstance(butlerOut, Butler) 

899 self.assertEqual(butlerOut._config, butler._config) 

900 self.assertEqual(butlerOut.collections, butler.collections) 

901 self.assertEqual(butlerOut.run, butler.run) 

902 

903 def testGetDatasetTypes(self): 

904 butler = Butler(self.tmpConfigFile, run=self.default_run) 

905 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

906 dimensionEntries = [ 

907 ( 

908 "instrument", 

909 {"instrument": "DummyCam"}, 

910 {"instrument": "DummyHSC"}, 

911 {"instrument": "DummyCamComp"}, 

912 ), 

913 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

914 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

915 ] 

916 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

917 # Add needed Dimensions 

918 for args in dimensionEntries: 

919 butler.registry.insertDimensionData(*args) 

920 

921 # When a DatasetType is added to the registry entries are not created 

922 # for components but querying them can return the components. 

923 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

924 components = set() 

925 for datasetTypeName in datasetTypeNames: 

926 # Create and register a DatasetType 

927 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

928 

929 for componentName in storageClass.components: 

930 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

931 

932 fromRegistry: set[DatasetType] = set() 

933 for parent_dataset_type in butler.registry.queryDatasetTypes(): 

934 fromRegistry.add(parent_dataset_type) 

935 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes()) 

936 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

937 

938 # Now that we have some dataset types registered, validate them 

939 butler.validateConfiguration( 

940 ignore=[ 

941 "test_metric_comp", 

942 "metric3", 

943 "metric5", 

944 "calexp", 

945 "DummySC", 

946 "datasetType.component", 

947 "random_data", 

948 "random_data_2", 

949 ] 

950 ) 

951 

952 # Add a new datasetType that will fail template validation 

953 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

954 if self.validationCanFail: 

955 with self.assertRaises(ValidationError): 

956 butler.validateConfiguration() 

957 

958 # Rerun validation but with a subset of dataset type names 

959 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

960 

961 # Rerun validation but ignore the bad datasetType 

962 butler.validateConfiguration( 

963 ignore=[ 

964 "test_metric_comp", 

965 "metric3", 

966 "metric5", 

967 "calexp", 

968 "DummySC", 

969 "datasetType.component", 

970 "random_data", 

971 "random_data_2", 

972 ] 

973 ) 

974 

975 def testTransaction(self): 

976 butler = Butler(self.tmpConfigFile, run=self.default_run) 

977 datasetTypeName = "test_metric" 

978 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

979 dimensionEntries = ( 

980 ("instrument", {"instrument": "DummyCam"}), 

981 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

982 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

983 ) 

984 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

985 metric = makeExampleMetrics() 

986 dataId = {"instrument": "DummyCam", "visit": 42} 

987 # Create and register a DatasetType 

988 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

989 with self.assertRaises(TransactionTestError): 

990 with butler.transaction(): 

991 # Add needed Dimensions 

992 for args in dimensionEntries: 

993 butler.registry.insertDimensionData(*args) 

994 # Store a dataset 

995 ref = butler.put(metric, datasetTypeName, dataId) 

996 self.assertIsInstance(ref, DatasetRef) 

997 # Test getDirect 

998 metricOut = butler.get(ref) 

999 self.assertEqual(metric, metricOut) 

1000 # Test get 

1001 metricOut = butler.get(datasetTypeName, dataId) 

1002 self.assertEqual(metric, metricOut) 

1003 # Check we can get components 

1004 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

1005 raise TransactionTestError("This should roll back the entire transaction") 

1006 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

1007 butler.registry.expandDataId(dataId) 

1008 # Should raise LookupError for missing data ID value 

1009 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

1010 butler.get(datasetTypeName, dataId) 

1011 # Also check explicitly if Dataset entry is missing 

1012 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

1013 # Direct retrieval should not find the file in the Datastore 

1014 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

1015 butler.get(ref) 

1016 

1017 def testMakeRepo(self): 

1018 """Test that we can write butler configuration to a new repository via 

1019 the Butler.makeRepo interface and then instantiate a butler from the 

1020 repo root. 

1021 """ 

1022 # Do not run the test if we know this datastore configuration does 

1023 # not support a file system root 

1024 if self.fullConfigKey is None: 

1025 return 

1026 

1027 # create two separate directories 

1028 root1 = tempfile.mkdtemp(dir=self.root) 

1029 root2 = tempfile.mkdtemp(dir=self.root) 

1030 

1031 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

1032 limited = Config(self.configFile) 

1033 butler1 = Butler(butlerConfig) 

1034 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

1035 full = Config(self.tmpConfigFile) 

1036 butler2 = Butler(butlerConfig) 

1037 # Butlers should have the same configuration regardless of whether 

1038 # defaults were expanded. 

1039 self.assertEqual(butler1._config, butler2._config) 

1040 # Config files loaded directly should not be the same. 

1041 self.assertNotEqual(limited, full) 

1042 # Make sure "limited" doesn't have a few keys we know it should be 

1043 # inheriting from defaults. 

1044 self.assertIn(self.fullConfigKey, full) 

1045 self.assertNotIn(self.fullConfigKey, limited) 

1046 

1047 # Collections don't appear until something is put in them 

1048 collections1 = set(butler1.registry.queryCollections()) 

1049 self.assertEqual(collections1, set()) 

1050 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1051 

1052 # Check that a config with no associated file name will not 

1053 # work properly with relocatable Butler repo 

1054 butlerConfig.configFile = None 

1055 with self.assertRaises(ValueError): 

1056 Butler(butlerConfig) 

1057 

1058 with self.assertRaises(FileExistsError): 

1059 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1060 

1061 def testStringification(self): 

1062 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1063 butlerStr = str(butler) 

1064 

1065 if self.datastoreStr is not None: 

1066 for testStr in self.datastoreStr: 

1067 self.assertIn(testStr, butlerStr) 

1068 if self.registryStr is not None: 

1069 self.assertIn(self.registryStr, butlerStr) 

1070 

1071 datastoreName = butler.datastore.name 

1072 if self.datastoreName is not None: 

1073 for testStr in self.datastoreName: 

1074 self.assertIn(testStr, datastoreName) 

1075 

1076 def testButlerRewriteDataId(self): 

1077 """Test that dataIds can be rewritten based on dimension records.""" 

1078 

1079 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1080 

1081 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1082 datasetTypeName = "random_data" 

1083 

1084 # Create dimension records. 

1085 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1086 butler.registry.insertDimensionData( 

1087 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1088 ) 

1089 butler.registry.insertDimensionData( 

1090 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1091 ) 

1092 

1093 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1094 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1095 butler.registry.registerDatasetType(datasetType) 

1096 

1097 n_exposures = 5 

1098 dayobs = 20210530 

1099 

1100 for i in range(n_exposures): 

1101 butler.registry.insertDimensionData( 

1102 "exposure", 

1103 { 

1104 "instrument": "DummyCamComp", 

1105 "id": i, 

1106 "obs_id": f"exp{i}", 

1107 "seq_num": i, 

1108 "day_obs": dayobs, 

1109 "physical_filter": "d-r", 

1110 }, 

1111 ) 

1112 

1113 # Write some data. 

1114 for i in range(n_exposures): 

1115 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1116 

1117 # Use the seq_num for the put to test rewriting. 

1118 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1119 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1120 

1121 # Check that the exposure is correct in the dataId 

1122 self.assertEqual(ref.dataId["exposure"], i) 

1123 

1124 # and check that we can get the dataset back with the same dataId 

1125 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1126 self.assertEqual(new_metric, metric) 

1127 

1128 

1129class FileDatastoreButlerTests(ButlerTests): 

1130 """Common tests and specialization of ButlerTests for butlers backed 

1131 by datastores that inherit from FileDatastore. 

1132 """ 

1133 

1134 def checkFileExists(self, root, relpath): 

1135 """Checks if file exists at a given path (relative to root). 

1136 

1137 Test testPutTemplates verifies actual physical existance of the files 

1138 in the requested location. 

1139 """ 

1140 uri = ResourcePath(root, forceDirectory=True) 

1141 return uri.join(relpath).exists() 

1142 

1143 def testPutTemplates(self): 

1144 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1145 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1146 

1147 # Add needed Dimensions 

1148 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1149 butler.registry.insertDimensionData( 

1150 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1151 ) 

1152 butler.registry.insertDimensionData( 

1153 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1154 ) 

1155 butler.registry.insertDimensionData( 

1156 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1157 ) 

1158 

1159 # Create and store a dataset 

1160 metric = makeExampleMetrics() 

1161 

1162 # Create two almost-identical DatasetTypes (both will use default 

1163 # template) 

1164 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1165 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1166 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1167 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1168 

1169 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1170 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1171 

1172 # Put with exactly the data ID keys needed 

1173 ref = butler.put(metric, "metric1", dataId1) 

1174 uri = butler.getURI(ref) 

1175 self.assertTrue(uri.exists()) 

1176 self.assertTrue( 

1177 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle") 

1178 ) 

1179 

1180 # Check the template based on dimensions 

1181 if hasattr(butler.datastore, "templates"): 

1182 butler.datastore.templates.validateTemplates([ref]) 

1183 

1184 # Put with extra data ID keys (physical_filter is an optional 

1185 # dependency); should not change template (at least the way we're 

1186 # defining them to behave now; the important thing is that they 

1187 # must be consistent). 

1188 ref = butler.put(metric, "metric2", dataId2) 

1189 uri = butler.getURI(ref) 

1190 self.assertTrue(uri.exists()) 

1191 self.assertTrue( 

1192 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle") 

1193 ) 

1194 

1195 # Check the template based on dimensions 

1196 if hasattr(butler.datastore, "templates"): 

1197 butler.datastore.templates.validateTemplates([ref]) 

1198 

1199 # Use a template that has a typo in dimension record metadata. 

1200 # Easier to test with a butler that has a ref with records attached. 

1201 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1202 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1203 path = template.format(ref) 

1204 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1205 

1206 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1207 with self.assertRaises(KeyError): 

1208 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1209 template.format(ref) 

1210 

1211 # Now use a file template that will not result in unique filenames 

1212 with self.assertRaises(FileTemplateValidationError): 

1213 butler.put(metric, "metric3", dataId1) 

1214 

1215 def testImportExport(self): 

1216 # Run put/get tests just to create and populate a repo. 

1217 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1218 self.runImportExportTest(storageClass) 

1219 

1220 @unittest.expectedFailure 

1221 def testImportExportVirtualComposite(self): 

1222 # Run put/get tests just to create and populate a repo. 

1223 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1224 self.runImportExportTest(storageClass) 

1225 

1226 def runImportExportTest(self, storageClass): 

1227 """This test does an export to a temp directory and an import back 

1228 into a new temp directory repo. It does not assume a posix datastore""" 

1229 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1230 

1231 # Test that we must have a file extension. 

1232 with self.assertRaises(ValueError): 

1233 with exportButler.export(filename="dump", directory=".") as export: 

1234 pass 

1235 

1236 # Test that unknown format is not allowed. 

1237 with self.assertRaises(ValueError): 

1238 with exportButler.export(filename="dump.fits", directory=".") as export: 

1239 pass 

1240 

1241 # Test that the repo actually has at least one dataset. 

1242 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1243 self.assertGreater(len(datasets), 0) 

1244 # Add a DimensionRecord that's unused by those datasets. 

1245 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1246 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1247 # Export and then import datasets. 

1248 with safeTestTempDir(TESTDIR) as exportDir: 

1249 exportFile = os.path.join(exportDir, "exports.yaml") 

1250 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1251 export.saveDatasets(datasets) 

1252 # Export the same datasets again. This should quietly do 

1253 # nothing because of internal deduplication, and it shouldn't 

1254 # complain about being asked to export the "htm7" elements even 

1255 # though there aren't any in these datasets or in the database. 

1256 export.saveDatasets(datasets, elements=["htm7"]) 

1257 # Save one of the data IDs again; this should be harmless 

1258 # because of internal deduplication. 

1259 export.saveDataIds([datasets[0].dataId]) 

1260 # Save some dimension records directly. 

1261 export.saveDimensionData("skymap", [skymapRecord]) 

1262 self.assertTrue(os.path.exists(exportFile)) 

1263 with safeTestTempDir(TESTDIR) as importDir: 

1264 # We always want this to be a local posix butler 

1265 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1266 # Calling script.butlerImport tests the implementation of the 

1267 # butler command line interface "import" subcommand. Functions 

1268 # in the script folder are generally considered protected and 

1269 # should not be used as public api. 

1270 with open(exportFile, "r") as f: 

1271 script.butlerImport( 

1272 importDir, 

1273 export_file=f, 

1274 directory=exportDir, 

1275 transfer="auto", 

1276 skip_dimensions=None, 

1277 reuse_ids=False, 

1278 ) 

1279 importButler = Butler(importDir, run=self.default_run) 

1280 for ref in datasets: 

1281 with self.subTest(ref=ref): 

1282 # Test for existence by passing in the DatasetType and 

1283 # data ID separately, to avoid lookup by dataset_id. 

1284 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1285 self.assertEqual( 

1286 list(importButler.registry.queryDimensionRecords("skymap")), 

1287 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)], 

1288 ) 

1289 

1290 def testRemoveRuns(self): 

1291 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1292 butler = Butler(self.tmpConfigFile, writeable=True) 

1293 # Load registry data with dimensions to hang datasets off of. 

1294 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1295 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1296 # Add some RUN-type collection. 

1297 run1 = "run1" 

1298 butler.registry.registerRun(run1) 

1299 run2 = "run2" 

1300 butler.registry.registerRun(run2) 

1301 # put a dataset in each 

1302 metric = makeExampleMetrics() 

1303 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1304 datasetType = self.addDatasetType( 

1305 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1306 ) 

1307 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1308 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1309 uri1 = butler.getURI(ref1, collections=[run1]) 

1310 uri2 = butler.getURI(ref2, collections=[run2]) 

1311 

1312 with self.assertRaises(OrphanedRecordError): 

1313 butler.registry.removeDatasetType(datasetType.name) 

1314 

1315 # Remove from both runs with different values for unstore. 

1316 butler.removeRuns([run1], unstore=True) 

1317 butler.removeRuns([run2], unstore=False) 

1318 # Should be nothing in registry for either one, and datastore should 

1319 # not think either exists. 

1320 with self.assertRaises(MissingCollectionError): 

1321 butler.registry.getCollectionType(run1) 

1322 with self.assertRaises(MissingCollectionError): 

1323 butler.registry.getCollectionType(run2) 

1324 self.assertFalse(butler.datastore.exists(ref1)) 

1325 self.assertFalse(butler.datastore.exists(ref2)) 

1326 # The ref we unstored should be gone according to the URI, but the 

1327 # one we forgot should still be around. 

1328 self.assertFalse(uri1.exists()) 

1329 self.assertTrue(uri2.exists()) 

1330 

1331 # Now that the collections have been pruned we can remove the 

1332 # dataset type 

1333 butler.registry.removeDatasetType(datasetType.name) 

1334 

1335 with self.assertLogs("lsst.daf.butler.registries", "INFO") as cm: 

1336 butler.registry.removeDatasetType(tuple(["test*", "test*"])) 

1337 self.assertIn("not defined", "\n".join(cm.output)) 

1338 

1339 

1340class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1341 """PosixDatastore specialization of a butler""" 

1342 

1343 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1344 fullConfigKey = ".datastore.formatters" 

1345 validationCanFail = True 

1346 datastoreStr = ["/tmp"] 

1347 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1348 registryStr = "/gen3.sqlite3" 

1349 

1350 def testPathConstructor(self): 

1351 """Independent test of constructor using PathLike.""" 

1352 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1353 self.assertIsInstance(butler, Butler) 

1354 

1355 # And again with a Path object with the butler yaml 

1356 path = pathlib.Path(self.tmpConfigFile) 

1357 butler = Butler(path, writeable=False) 

1358 self.assertIsInstance(butler, Butler) 

1359 

1360 # And again with a Path object without the butler yaml 

1361 # (making sure we skip it if the tmp config doesn't end 

1362 # in butler.yaml -- which is the case for a subclass) 

1363 if self.tmpConfigFile.endswith("butler.yaml"): 

1364 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1365 butler = Butler(path, writeable=False) 

1366 self.assertIsInstance(butler, Butler) 

1367 

1368 def testExportTransferCopy(self): 

1369 """Test local export using all transfer modes""" 

1370 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1371 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1372 # Test that the repo actually has at least one dataset. 

1373 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1374 self.assertGreater(len(datasets), 0) 

1375 uris = [exportButler.getURI(d) for d in datasets] 

1376 datastoreRoot = exportButler.datastore.root 

1377 

1378 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1379 

1380 for path in pathsInStore: 

1381 # Assume local file system 

1382 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1383 

1384 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1385 with safeTestTempDir(TESTDIR) as exportDir: 

1386 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1387 export.saveDatasets(datasets) 

1388 for path in pathsInStore: 

1389 self.assertTrue( 

1390 self.checkFileExists(exportDir, path), 

1391 f"Check that mode {transfer} exported files", 

1392 ) 

1393 

1394 def testPruneDatasets(self): 

1395 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1396 butler = Butler(self.tmpConfigFile, writeable=True) 

1397 # Load registry data with dimensions to hang datasets off of. 

1398 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1399 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1400 # Add some RUN-type collections. 

1401 run1 = "run1" 

1402 butler.registry.registerRun(run1) 

1403 run2 = "run2" 

1404 butler.registry.registerRun(run2) 

1405 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1406 # different runs. ref3 has a different data ID. 

1407 metric = makeExampleMetrics() 

1408 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1409 datasetType = self.addDatasetType( 

1410 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1411 ) 

1412 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1413 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1414 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1415 

1416 # Simple prune. 

1417 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1418 with self.assertRaises(LookupError): 

1419 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1) 

1420 

1421 # Put data back. 

1422 ref1 = butler.put(metric, ref1.unresolved(), run=run1) 

1423 ref2 = butler.put(metric, ref2.unresolved(), run=run2) 

1424 ref3 = butler.put(metric, ref3.unresolved(), run=run1) 

1425 

1426 # Check that in normal mode, deleting the record will lead to 

1427 # trash not touching the file. 

1428 uri1 = butler.datastore.getURI(ref1) 

1429 butler.datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table 

1430 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id}) 

1431 butler.datastore.trash(ref1) 

1432 butler.datastore.emptyTrash() 

1433 self.assertTrue(uri1.exists()) 

1434 uri1.remove() # Clean it up. 

1435 

1436 # Simulate execution butler setup by deleting the datastore 

1437 # record but keeping the file around and trusting. 

1438 butler.datastore.trustGetRequest = True 

1439 uri2 = butler.datastore.getURI(ref2) 

1440 uri3 = butler.datastore.getURI(ref3) 

1441 self.assertTrue(uri2.exists()) 

1442 self.assertTrue(uri3.exists()) 

1443 

1444 # Remove the datastore record. 

1445 butler.datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table 

1446 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id}) 

1447 self.assertTrue(uri2.exists()) 

1448 butler.datastore.trash([ref2, ref3]) 

1449 # Immediate removal for ref2 file 

1450 self.assertFalse(uri2.exists()) 

1451 # But ref3 has to wait for the empty. 

1452 self.assertTrue(uri3.exists()) 

1453 butler.datastore.emptyTrash() 

1454 self.assertFalse(uri3.exists()) 

1455 

1456 # Clear out the datasets from registry. 

1457 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1458 

1459 def testPytypeCoercion(self): 

1460 """Test python type coercion on Butler.get and put.""" 

1461 

1462 # Store some data with the normal example storage class. 

1463 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1464 datasetTypeName = "test_metric" 

1465 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1466 

1467 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1468 metric = butler.get(datasetTypeName, dataId=dataId) 

1469 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1470 

1471 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1472 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1473 

1474 # Now need to hack the registry dataset type definition. 

1475 # There is no API for this. 

1476 manager = butler.registry._managers.datasets 

1477 manager._db.update( 

1478 manager._static.dataset_type, 

1479 {"name": datasetTypeName}, 

1480 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1481 ) 

1482 

1483 # Force reset of dataset type cache 

1484 butler.registry.refresh() 

1485 

1486 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1487 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1488 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1489 

1490 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1491 self.assertNotEqual(type(metric_model), type(metric)) 

1492 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1493 

1494 # Put the model and read it back to show that everything now 

1495 # works as normal. 

1496 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1497 metric_model_new = butler.get(metric_ref) 

1498 self.assertEqual(metric_model_new, metric_model) 

1499 

1500 # Hack the storage class again to something that will fail on the 

1501 # get with no conversion class. 

1502 manager._db.update( 

1503 manager._static.dataset_type, 

1504 {"name": datasetTypeName}, 

1505 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1506 ) 

1507 butler.registry.refresh() 

1508 

1509 with self.assertRaises(ValueError): 

1510 butler.get(datasetTypeName, dataId=dataId) 

1511 

1512 

1513@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1514class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1515 """PosixDatastore specialization of a butler using Postgres""" 

1516 

1517 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1518 fullConfigKey = ".datastore.formatters" 

1519 validationCanFail = True 

1520 datastoreStr = ["/tmp"] 

1521 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1522 registryStr = "PostgreSQL@test" 

1523 

1524 @staticmethod 

1525 def _handler(postgresql): 

1526 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1527 with engine.begin() as connection: 

1528 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1529 

1530 @classmethod 

1531 def setUpClass(cls): 

1532 # Create the postgres test server. 

1533 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1534 cache_initialized_db=True, on_initialized=cls._handler 

1535 ) 

1536 super().setUpClass() 

1537 

1538 @classmethod 

1539 def tearDownClass(cls): 

1540 # Clean up any lingering SQLAlchemy engines/connections 

1541 # so they're closed before we shut down the server. 

1542 gc.collect() 

1543 cls.postgresql.clear_cache() 

1544 super().tearDownClass() 

1545 

1546 def setUp(self): 

1547 self.server = self.postgresql() 

1548 

1549 # Need to add a registry section to the config. 

1550 self._temp_config = False 

1551 config = Config(self.configFile) 

1552 config["registry", "db"] = self.server.url() 

1553 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1554 config.dump(fh) 

1555 self.configFile = fh.name 

1556 self._temp_config = True 

1557 super().setUp() 

1558 

1559 def tearDown(self): 

1560 self.server.stop() 

1561 if self._temp_config and os.path.exists(self.configFile): 

1562 os.remove(self.configFile) 

1563 super().tearDown() 

1564 

1565 def testMakeRepo(self): 

1566 # The base class test assumes that it's using sqlite and assumes 

1567 # the config file is acceptable to sqlite. 

1568 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1569 

1570 

1571class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1572 """InMemoryDatastore specialization of a butler""" 

1573 

1574 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1575 fullConfigKey = None 

1576 useTempRoot = False 

1577 validationCanFail = False 

1578 datastoreStr = ["datastore='InMemory"] 

1579 datastoreName = ["InMemoryDatastore@"] 

1580 registryStr = "/gen3.sqlite3" 

1581 

1582 def testIngest(self): 

1583 pass 

1584 

1585 

1586class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1587 """PosixDatastore specialization""" 

1588 

1589 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1590 fullConfigKey = ".datastore.datastores.1.formatters" 

1591 validationCanFail = True 

1592 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1593 datastoreName = [ 

1594 "InMemoryDatastore@", 

1595 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1596 "SecondDatastore", 

1597 ] 

1598 registryStr = "/gen3.sqlite3" 

1599 

1600 

1601class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1602 """Test that a yaml file in one location can refer to a root in another.""" 

1603 

1604 datastoreStr = ["dir1"] 

1605 # Disable the makeRepo test since we are deliberately not using 

1606 # butler.yaml as the config name. 

1607 fullConfigKey = None 

1608 

1609 def setUp(self): 

1610 self.root = makeTestTempDir(TESTDIR) 

1611 

1612 # Make a new repository in one place 

1613 self.dir1 = os.path.join(self.root, "dir1") 

1614 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1615 

1616 # Move the yaml file to a different place and add a "root" 

1617 self.dir2 = os.path.join(self.root, "dir2") 

1618 os.makedirs(self.dir2, exist_ok=True) 

1619 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1620 config = Config(configFile1) 

1621 config["root"] = self.dir1 

1622 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1623 config.dumpToUri(configFile2) 

1624 os.remove(configFile1) 

1625 self.tmpConfigFile = configFile2 

1626 

1627 def testFileLocations(self): 

1628 self.assertNotEqual(self.dir1, self.dir2) 

1629 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1630 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1631 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1632 

1633 

1634class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1635 """Test that a config file created by makeRepo outside of repo works.""" 

1636 

1637 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1638 

1639 def setUp(self): 

1640 self.root = makeTestTempDir(TESTDIR) 

1641 self.root2 = makeTestTempDir(TESTDIR) 

1642 

1643 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1644 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1645 

1646 def tearDown(self): 

1647 if os.path.exists(self.root2): 

1648 shutil.rmtree(self.root2, ignore_errors=True) 

1649 super().tearDown() 

1650 

1651 def testConfigExistence(self): 

1652 c = Config(self.tmpConfigFile) 

1653 uri_config = ResourcePath(c["root"]) 

1654 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1655 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1656 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1657 

1658 def testPutGet(self): 

1659 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1660 self.runPutGetTest(storageClass, "test_metric") 

1661 

1662 

1663class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1664 """Test that a config file created by makeRepo outside of repo works.""" 

1665 

1666 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1667 

1668 def setUp(self): 

1669 self.root = makeTestTempDir(TESTDIR) 

1670 self.root2 = makeTestTempDir(TESTDIR) 

1671 

1672 self.tmpConfigFile = self.root2 

1673 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1674 

1675 def testConfigExistence(self): 

1676 # Append the yaml file else Config constructor does not know the file 

1677 # type. 

1678 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1679 super().testConfigExistence() 

1680 

1681 

1682class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1683 """Test that a config file created by makeRepo outside of repo works.""" 

1684 

1685 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1686 

1687 def setUp(self): 

1688 self.root = makeTestTempDir(TESTDIR) 

1689 self.root2 = makeTestTempDir(TESTDIR) 

1690 

1691 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1692 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1693 

1694 

1695@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1696class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1697 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1698 a local in-memory SqlRegistry. 

1699 """ 

1700 

1701 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1702 fullConfigKey = None 

1703 validationCanFail = True 

1704 

1705 bucketName = "anybucketname" 

1706 """Name of the Bucket that will be used in the tests. The name is read from 

1707 the config file used with the tests during set-up. 

1708 """ 

1709 

1710 root = "butlerRoot/" 

1711 """Root repository directory expected to be used in case useTempRoot=False. 

1712 Otherwise the root is set to a 20 characters long randomly generated string 

1713 during set-up. 

1714 """ 

1715 

1716 datastoreStr = [f"datastore={root}"] 

1717 """Contains all expected root locations in a format expected to be 

1718 returned by Butler stringification. 

1719 """ 

1720 

1721 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1722 """The expected format of the S3 Datastore string.""" 

1723 

1724 registryStr = "/gen3.sqlite3" 

1725 """Expected format of the Registry string.""" 

1726 

1727 mock_s3 = mock_s3() 

1728 """The mocked s3 interface from moto.""" 

1729 

1730 def genRoot(self): 

1731 """Returns a random string of len 20 to serve as a root 

1732 name for the temporary bucket repo. 

1733 

1734 This is equivalent to tempfile.mkdtemp as this is what self.root 

1735 becomes when useTempRoot is True. 

1736 """ 

1737 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1738 return rndstr + "/" 

1739 

1740 def setUp(self): 

1741 config = Config(self.configFile) 

1742 uri = ResourcePath(config[".datastore.datastore.root"]) 

1743 self.bucketName = uri.netloc 

1744 

1745 # Enable S3 mocking of tests. 

1746 self.mock_s3.start() 

1747 

1748 # set up some fake credentials if they do not exist 

1749 self.usingDummyCredentials = setAwsEnvCredentials() 

1750 

1751 if self.useTempRoot: 

1752 self.root = self.genRoot() 

1753 rooturi = f"s3://{self.bucketName}/{self.root}" 

1754 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1755 

1756 # need local folder to store registry database 

1757 self.reg_dir = makeTestTempDir(TESTDIR) 

1758 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1759 

1760 # MOTO needs to know that we expect Bucket bucketname to exist 

1761 # (this used to be the class attribute bucketName) 

1762 s3 = boto3.resource("s3") 

1763 s3.create_bucket(Bucket=self.bucketName) 

1764 

1765 self.datastoreStr = f"datastore={self.root}" 

1766 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1767 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1768 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1769 

1770 def tearDown(self): 

1771 s3 = boto3.resource("s3") 

1772 bucket = s3.Bucket(self.bucketName) 

1773 try: 

1774 bucket.objects.all().delete() 

1775 except botocore.exceptions.ClientError as e: 

1776 if e.response["Error"]["Code"] == "404": 

1777 # the key was not reachable - pass 

1778 pass 

1779 else: 

1780 raise 

1781 

1782 bucket = s3.Bucket(self.bucketName) 

1783 bucket.delete() 

1784 

1785 # Stop the S3 mock. 

1786 self.mock_s3.stop() 

1787 

1788 # unset any potentially set dummy credentials 

1789 if self.usingDummyCredentials: 

1790 unsetAwsEnvCredentials() 

1791 

1792 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1793 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1794 

1795 if self.useTempRoot and os.path.exists(self.root): 

1796 shutil.rmtree(self.root, ignore_errors=True) 

1797 

1798 super().tearDown() 

1799 

1800 

1801class PosixDatastoreTransfers(unittest.TestCase): 

1802 """Test data transfers between butlers. 

1803 

1804 Test for different managers. UUID to UUID and integer to integer are 

1805 tested. UUID to integer is not supported since we do not currently 

1806 want to allow that. Integer to UUID is supported with the caveat 

1807 that UUID4 will be generated and this will be incorrect for raw 

1808 dataset types. The test ignores that. 

1809 """ 

1810 

1811 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1812 

1813 @classmethod 

1814 def setUpClass(cls): 

1815 cls.storageClassFactory = StorageClassFactory() 

1816 cls.storageClassFactory.addFromConfig(cls.configFile) 

1817 

1818 def setUp(self): 

1819 self.root = makeTestTempDir(TESTDIR) 

1820 self.config = Config(self.configFile) 

1821 

1822 def tearDown(self): 

1823 removeTestTempDir(self.root) 

1824 

1825 def create_butler(self, manager, label): 

1826 config = Config(self.configFile) 

1827 config["registry", "managers", "datasets"] = manager 

1828 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

1829 

1830 def create_butlers(self, manager1=None, manager2=None): 

1831 default = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

1832 if manager1 is None: 

1833 manager1 = default 

1834 if manager2 is None: 

1835 manager2 = default 

1836 self.source_butler = self.create_butler(manager1, "1") 

1837 self.target_butler = self.create_butler(manager2, "2") 

1838 

1839 def testTransferUuidToUuid(self): 

1840 self.create_butlers() 

1841 # Setting id_gen_map should have no effect here 

1842 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1843 

1844 def _enable_trust(self, datastore) -> None: 

1845 if hasattr(datastore, "trustGetRequest"): 

1846 datastore.trustGetRequest = True 

1847 elif hasattr(datastore, "datastores"): 

1848 for datastore in datastore.datastores: 

1849 if hasattr(datastore, "trustGetRequest"): 

1850 datastore.trustGetRequest = True 

1851 

1852 def testTransferMissing(self): 

1853 """Test transfers where datastore records are missing. 

1854 

1855 This is how execution butler works. 

1856 """ 

1857 self.create_butlers() 

1858 

1859 # Configure the source butler to allow trust. 

1860 self._enable_trust(self.source_butler.datastore) 

1861 

1862 self.assertButlerTransfers(purge=True) 

1863 

1864 def testTransferMissingDisassembly(self): 

1865 """Test transfers where datastore records are missing. 

1866 

1867 This is how execution butler works. 

1868 """ 

1869 self.create_butlers() 

1870 

1871 # Configure the source butler to allow trust. 

1872 self._enable_trust(self.source_butler.datastore) 

1873 

1874 # Test disassembly. 

1875 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

1876 

1877 def testAbsoluteURITransferDirect(self): 

1878 """Test transfer using an absolute URI.""" 

1879 self._absolute_transfer("auto") 

1880 

1881 def testAbsoluteURITransferCopy(self): 

1882 """Test transfer using an absolute URI.""" 

1883 self._absolute_transfer("copy") 

1884 

1885 def _absolute_transfer(self, transfer): 

1886 self.create_butlers() 

1887 

1888 storageClassName = "StructuredData" 

1889 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

1890 datasetTypeName = "random_data" 

1891 run = "run1" 

1892 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

1893 

1894 dimensions = self.source_butler.registry.dimensions.extract(()) 

1895 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1896 self.source_butler.registry.registerDatasetType(datasetType) 

1897 

1898 metrics = makeExampleMetrics() 

1899 with ResourcePath.temporary_uri(suffix=".json") as temp: 

1900 source_refs = [DatasetRef(datasetType, {}, run=run)] 

1901 temp.write(json.dumps(metrics.exportAsDict()).encode()) 

1902 dataset = FileDataset(path=temp, refs=source_refs) 

1903 self.source_butler.ingest(dataset, transfer="direct") 

1904 

1905 self.target_butler.transfer_from( 

1906 self.source_butler, dataset.refs, register_dataset_types=True, transfer=transfer 

1907 ) 

1908 

1909 uri = self.target_butler.getURI(dataset.refs[0]) 

1910 if transfer == "auto": 

1911 self.assertEqual(uri, temp) 

1912 else: 

1913 self.assertNotEqual(uri, temp) 

1914 

1915 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"): 

1916 """Test that a run can be transferred to another butler.""" 

1917 

1918 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

1919 datasetTypeName = "random_data" 

1920 

1921 # Test will create 3 collections and we will want to transfer 

1922 # two of those three. 

1923 runs = ["run1", "run2", "other"] 

1924 

1925 # Also want to use two different dataset types to ensure that 

1926 # grouping works. 

1927 datasetTypeNames = ["random_data", "random_data_2"] 

1928 

1929 # Create the run collections in the source butler. 

1930 for run in runs: 

1931 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

1932 

1933 # Create dimensions in source butler. 

1934 n_exposures = 30 

1935 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1936 self.source_butler.registry.insertDimensionData( 

1937 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1938 ) 

1939 self.source_butler.registry.insertDimensionData( 

1940 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1941 ) 

1942 

1943 for i in range(n_exposures): 

1944 self.source_butler.registry.insertDimensionData( 

1945 "exposure", 

1946 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

1947 ) 

1948 

1949 # Create dataset types in the source butler. 

1950 dimensions = self.source_butler.registry.dimensions.extract(["instrument", "exposure"]) 

1951 for datasetTypeName in datasetTypeNames: 

1952 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1953 self.source_butler.registry.registerDatasetType(datasetType) 

1954 

1955 # Write a dataset to an unrelated run -- this will ensure that 

1956 # we are rewriting integer dataset ids in the target if necessary. 

1957 # Will not be relevant for UUID. 

1958 run = "distraction" 

1959 butler = Butler(butler=self.source_butler, run=run) 

1960 butler.put( 

1961 makeExampleMetrics(), 

1962 datasetTypeName, 

1963 exposure=1, 

1964 instrument="DummyCamComp", 

1965 physical_filter="d-r", 

1966 ) 

1967 

1968 # Write some example metrics to the source 

1969 butler = Butler(butler=self.source_butler) 

1970 

1971 # Set of DatasetRefs that should be in the list of refs to transfer 

1972 # but which will not be transferred. 

1973 deleted = set() 

1974 

1975 n_expected = 20 # Number of datasets expected to be transferred 

1976 source_refs = [] 

1977 for i in range(n_exposures): 

1978 # Put a third of datasets into each collection, only retain 

1979 # two thirds. 

1980 index = i % 3 

1981 run = runs[index] 

1982 datasetTypeName = datasetTypeNames[i % 2] 

1983 

1984 metric_data = { 

1985 "summary": {"counter": i}, 

1986 "output": {"text": "metric"}, 

1987 "data": [2 * x for x in range(i)], 

1988 } 

1989 metric = MetricsExample(**metric_data) 

1990 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1991 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

1992 

1993 # Remove the datastore record using low-level API 

1994 if purge: 

1995 # Remove records for a fraction. 

1996 if index == 1: 

1997 # For one of these delete the file as well. 

1998 # This allows the "missing" code to filter the 

1999 # file out. 

2000 # Access the individual datastores. 

2001 datastores = [] 

2002 if hasattr(butler.datastore, "datastores"): 

2003 datastores.extend(butler.datastore.datastores) 

2004 else: 

2005 datastores.append(butler.datastore) 

2006 

2007 if not deleted: 

2008 # For a chained datastore we need to remove 

2009 # files in each chain. 

2010 for datastore in datastores: 

2011 # The file might not be known to the datastore 

2012 # if constraints are used. 

2013 try: 

2014 primary, uris = datastore.getURIs(ref) 

2015 except FileNotFoundError: 

2016 continue 

2017 if primary: 

2018 if primary.scheme != "mem": 

2019 primary.remove() 

2020 for uri in uris.values(): 

2021 if uri.scheme != "mem": 

2022 uri.remove() 

2023 n_expected -= 1 

2024 deleted.add(ref) 

2025 

2026 # Remove the datastore record. 

2027 for datastore in datastores: 

2028 if hasattr(datastore, "removeStoredItemInfo"): 

2029 datastore.removeStoredItemInfo(ref) 

2030 

2031 if index < 2: 

2032 source_refs.append(ref) 

2033 if ref not in deleted: 

2034 new_metric = butler.get(ref.unresolved(), collections=run) 

2035 self.assertEqual(new_metric, metric) 

2036 

2037 # Create some bad dataset types to ensure we check for inconsistent 

2038 # definitions. 

2039 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2040 for datasetTypeName in datasetTypeNames: 

2041 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2042 self.target_butler.registry.registerDatasetType(datasetType) 

2043 with self.assertRaises(ConflictingDefinitionError) as cm: 

2044 self.target_butler.transfer_from(self.source_butler, source_refs) 

2045 self.assertIn("dataset type differs", str(cm.exception)) 

2046 

2047 # And remove the bad definitions. 

2048 for datasetTypeName in datasetTypeNames: 

2049 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2050 

2051 # Transfer without creating dataset types should fail. 

2052 with self.assertRaises(KeyError): 

2053 self.target_butler.transfer_from(self.source_butler, source_refs) 

2054 

2055 # Transfer without creating dimensions should fail. 

2056 with self.assertRaises(ConflictingDefinitionError) as cm: 

2057 self.target_butler.transfer_from(self.source_butler, source_refs, register_dataset_types=True) 

2058 self.assertIn("dimension", str(cm.exception)) 

2059 

2060 # The failed transfer above leaves registry in an inconsistent 

2061 # state because the run is created but then rolled back without 

2062 # the collection cache being cleared. For now force a refresh. 

2063 # Can remove with DM-35498. 

2064 self.target_butler.registry.refresh() 

2065 

2066 # Now transfer them to the second butler, including dimensions. 

2067 with self.assertLogs(level=logging.DEBUG) as cm: 

2068 transferred = self.target_butler.transfer_from( 

2069 self.source_butler, 

2070 source_refs, 

2071 register_dataset_types=True, 

2072 transfer_dimensions=True, 

2073 ) 

2074 self.assertEqual(len(transferred), n_expected) 

2075 log_output = ";".join(cm.output) 

2076 

2077 # A ChainedDatastore will use the in-memory datastore for mexists 

2078 # so we can not rely on the mexists log message. 

2079 self.assertIn("Number of datastore records found in source", log_output) 

2080 self.assertIn("Creating output run", log_output) 

2081 

2082 # Do the transfer twice to ensure that it will do nothing extra. 

2083 # Only do this if purge=True because it does not work for int 

2084 # dataset_id. 

2085 if purge: 

2086 # This should not need to register dataset types. 

2087 transferred = self.target_butler.transfer_from(self.source_butler, source_refs) 

2088 self.assertEqual(len(transferred), n_expected) 

2089 

2090 # Also do an explicit low-level transfer to trigger some 

2091 # edge cases. 

2092 with self.assertLogs(level=logging.DEBUG) as cm: 

2093 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

2094 log_output = ";".join(cm.output) 

2095 self.assertIn("no file artifacts exist", log_output) 

2096 

2097 with self.assertRaises((TypeError, AttributeError)): 

2098 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) 

2099 

2100 with self.assertRaises(ValueError): 

2101 self.target_butler.datastore.transfer_from( 

2102 self.source_butler.datastore, source_refs, transfer="split" 

2103 ) 

2104 

2105 # Now try to get the same refs from the new butler. 

2106 for ref in source_refs: 

2107 if ref not in deleted: 

2108 unresolved_ref = ref.unresolved() 

2109 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run) 

2110 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run) 

2111 self.assertEqual(new_metric, old_metric) 

2112 

2113 # Now prune run2 collection and create instead a CHAINED collection. 

2114 # This should block the transfer. 

2115 self.target_butler.removeRuns(["run2"], unstore=True) 

2116 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2117 with self.assertRaises(CollectionTypeError): 

2118 # Re-importing the run1 datasets can be problematic if they 

2119 # use integer IDs so filter those out. 

2120 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2121 self.target_butler.transfer_from(self.source_butler, to_transfer) 

2122 

2123 

2124class ChainedDatastoreTransfers(PosixDatastoreTransfers): 

2125 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

2126 

2127 

2128if __name__ == "__main__": 

2129 unittest.main()