Coverage for tests/test_butler.py: 14%

1251 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-02 14:18 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import gc 

26import logging 

27import os 

28import pathlib 

29import pickle 

30import posixpath 

31import random 

32import shutil 

33import socket 

34import string 

35import tempfile 

36import time 

37import unittest 

38from tempfile import gettempdir 

39from threading import Thread 

40 

41try: 

42 import boto3 

43 import botocore 

44 from moto import mock_s3 

45except ImportError: 

46 boto3 = None 

47 

48 def mock_s3(cls): 

49 """A no-op decorator in case moto mock_s3 can not be imported.""" 

50 return cls 

51 

52 

53try: 

54 # It's possible but silly to have testing.postgresql installed without 

55 # having the postgresql server installed (because then nothing in 

56 # testing.postgresql would work), so we use the presence of that module 

57 # to test whether we can expect the server to be available. 

58 import testing.postgresql 

59except ImportError: 

60 testing = None 

61 

62 

63try: 

64 from cheroot import wsgi 

65 from wsgidav.wsgidav_app import WsgiDAVApp 

66except ImportError: 

67 WsgiDAVApp = None 

68 

69import astropy.time 

70import sqlalchemy 

71from lsst.daf.butler import ( 

72 Butler, 

73 ButlerConfig, 

74 CollectionType, 

75 Config, 

76 DatasetIdGenEnum, 

77 DatasetRef, 

78 DatasetType, 

79 FileDataset, 

80 FileTemplate, 

81 FileTemplateValidationError, 

82 StorageClassFactory, 

83 ValidationError, 

84 script, 

85) 

86from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

87from lsst.daf.butler.registry import ( 

88 CollectionError, 

89 CollectionTypeError, 

90 ConflictingDefinitionError, 

91 DataIdValueError, 

92 MissingCollectionError, 

93) 

94from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

95from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

96from lsst.resources import ResourcePath 

97from lsst.resources.http import _is_webdav_endpoint 

98from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

99from lsst.utils import doImport 

100from lsst.utils.introspection import get_full_type_name 

101 

102TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

103 

104 

105def makeExampleMetrics(): 

106 return MetricsExample( 

107 {"AM1": 5.2, "AM2": 30.6}, 

108 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

109 [563, 234, 456.7, 752, 8, 9, 27], 

110 ) 

111 

112 

113class TransactionTestError(Exception): 

114 """Specific error for testing transactions, to prevent misdiagnosing 

115 that might otherwise occur when a standard exception is used. 

116 """ 

117 

118 pass 

119 

120 

121class ButlerConfigTests(unittest.TestCase): 

122 """Simple tests for ButlerConfig that are not tested in any other test 

123 cases.""" 

124 

125 def testSearchPath(self): 

126 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

127 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

128 config1 = ButlerConfig(configFile) 

129 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

130 

131 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

132 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

133 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

134 self.assertIn("testConfigs", "\n".join(cm.output)) 

135 

136 key = ("datastore", "records", "table") 

137 self.assertNotEqual(config1[key], config2[key]) 

138 self.assertEqual(config2[key], "override_record") 

139 

140 

141class ButlerPutGetTests: 

142 """Helper method for running a suite of put/get tests from different 

143 butler configurations.""" 

144 

145 root = None 

146 default_run = "ingésτ😺" 

147 

148 @staticmethod 

149 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

150 """Create a DatasetType and register it""" 

151 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

152 registry.registerDatasetType(datasetType) 

153 return datasetType 

154 

155 @classmethod 

156 def setUpClass(cls): 

157 cls.storageClassFactory = StorageClassFactory() 

158 cls.storageClassFactory.addFromConfig(cls.configFile) 

159 

160 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

161 datasetType = datasetRef.datasetType 

162 dataId = datasetRef.dataId 

163 deferred = butler.getDirectDeferred(datasetRef) 

164 

165 for component in components: 

166 compTypeName = datasetType.componentTypeName(component) 

167 result = butler.get(compTypeName, dataId, collections=collections) 

168 self.assertEqual(result, getattr(reference, component)) 

169 result_deferred = deferred.get(component=component) 

170 self.assertEqual(result_deferred, result) 

171 

172 def tearDown(self): 

173 removeTestTempDir(self.root) 

174 

175 def create_butler(self, run, storageClass, datasetTypeName): 

176 butler = Butler(self.tmpConfigFile, run=run) 

177 

178 collections = set(butler.registry.queryCollections()) 

179 self.assertEqual(collections, set([run])) 

180 

181 # Create and register a DatasetType 

182 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

183 

184 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

185 

186 # Add needed Dimensions 

187 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

188 butler.registry.insertDimensionData( 

189 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

190 ) 

191 butler.registry.insertDimensionData( 

192 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

193 ) 

194 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

195 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

196 butler.registry.insertDimensionData( 

197 "visit", 

198 { 

199 "instrument": "DummyCamComp", 

200 "id": 423, 

201 "name": "fourtwentythree", 

202 "physical_filter": "d-r", 

203 "visit_system": 1, 

204 "datetime_begin": visit_start, 

205 "datetime_end": visit_end, 

206 }, 

207 ) 

208 

209 # Add more visits for some later tests 

210 for visit_id in (424, 425): 

211 butler.registry.insertDimensionData( 

212 "visit", 

213 { 

214 "instrument": "DummyCamComp", 

215 "id": visit_id, 

216 "name": f"fourtwentyfour_{visit_id}", 

217 "physical_filter": "d-r", 

218 "visit_system": 1, 

219 }, 

220 ) 

221 return butler, datasetType 

222 

223 def runPutGetTest(self, storageClass, datasetTypeName): 

224 # New datasets will be added to run and tag, but we will only look in 

225 # tag when looking up datasets. 

226 run = self.default_run 

227 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

228 

229 # Create and store a dataset 

230 metric = makeExampleMetrics() 

231 dataId = {"instrument": "DummyCamComp", "visit": 423} 

232 

233 # Create a DatasetRef for put 

234 refIn = DatasetRef(datasetType, dataId, id=None) 

235 

236 # Put with a preexisting id should fail 

237 with self.assertRaises(ValueError): 

238 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

239 

240 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

241 # and once with a DatasetType 

242 

243 # Keep track of any collections we add and do not clean up 

244 expected_collections = {run} 

245 

246 counter = 0 

247 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

248 # Since we are using subTest we can get cascading failures 

249 # here with the first attempt failing and the others failing 

250 # immediately because the dataset already exists. Work around 

251 # this by using a distinct run collection each time 

252 counter += 1 

253 this_run = f"put_run_{counter}" 

254 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

255 expected_collections.update({this_run}) 

256 

257 with self.subTest(args=args): 

258 ref = butler.put(metric, *args, run=this_run) 

259 self.assertIsInstance(ref, DatasetRef) 

260 

261 # Test getDirect 

262 metricOut = butler.getDirect(ref) 

263 self.assertEqual(metric, metricOut) 

264 # Test get 

265 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

266 self.assertEqual(metric, metricOut) 

267 # Test get with a datasetRef 

268 metricOut = butler.get(ref, collections=this_run) 

269 self.assertEqual(metric, metricOut) 

270 # Test getDeferred with dataId 

271 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

272 self.assertEqual(metric, metricOut) 

273 # Test getDeferred with a datasetRef 

274 metricOut = butler.getDeferred(ref, collections=this_run).get() 

275 self.assertEqual(metric, metricOut) 

276 # and deferred direct with ref 

277 metricOut = butler.getDirectDeferred(ref).get() 

278 self.assertEqual(metric, metricOut) 

279 

280 # Check we can get components 

281 if storageClass.isComposite(): 

282 self.assertGetComponents( 

283 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

284 ) 

285 

286 # Can the artifacts themselves be retrieved? 

287 if not butler.datastore.isEphemeral: 

288 root_uri = ResourcePath(self.root) 

289 

290 for preserve_path in (True, False): 

291 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

292 # Use copy so that we can test that overwrite 

293 # protection works (using "auto" for File URIs would 

294 # use hard links and subsequent transfer would work 

295 # because it knows they are the same file). 

296 transferred = butler.retrieveArtifacts( 

297 [ref], destination, preserve_path=preserve_path, transfer="copy" 

298 ) 

299 self.assertGreater(len(transferred), 0) 

300 artifacts = list(ResourcePath.findFileResources([destination])) 

301 self.assertEqual(set(transferred), set(artifacts)) 

302 

303 for artifact in transferred: 

304 path_in_destination = artifact.relative_to(destination) 

305 self.assertIsNotNone(path_in_destination) 

306 

307 # when path is not preserved there should not be 

308 # any path separators. 

309 num_seps = path_in_destination.count("/") 

310 if preserve_path: 

311 self.assertGreater(num_seps, 0) 

312 else: 

313 self.assertEqual(num_seps, 0) 

314 

315 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

316 n_uris = len(secondary_uris) 

317 if primary_uri: 

318 n_uris += 1 

319 self.assertEqual( 

320 len(artifacts), 

321 n_uris, 

322 "Comparing expected artifacts vs actual:" 

323 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

324 ) 

325 

326 if preserve_path: 

327 # No need to run these twice 

328 with self.assertRaises(ValueError): 

329 butler.retrieveArtifacts([ref], destination, transfer="move") 

330 

331 with self.assertRaises(FileExistsError): 

332 butler.retrieveArtifacts([ref], destination) 

333 

334 transferred_again = butler.retrieveArtifacts( 

335 [ref], destination, preserve_path=preserve_path, overwrite=True 

336 ) 

337 self.assertEqual(set(transferred_again), set(transferred)) 

338 

339 # Now remove the dataset completely. 

340 butler.pruneDatasets([ref], purge=True, unstore=True) 

341 # Lookup with original args should still fail. 

342 with self.assertRaises(LookupError): 

343 butler.datasetExists(*args, collections=this_run) 

344 # getDirect() should still fail. 

345 with self.assertRaises(FileNotFoundError): 

346 butler.getDirect(ref) 

347 # Registry shouldn't be able to find it by dataset_id anymore. 

348 self.assertIsNone(butler.registry.getDataset(ref.id)) 

349 

350 # Do explicit registry removal since we know they are 

351 # empty 

352 butler.registry.removeCollection(this_run) 

353 expected_collections.remove(this_run) 

354 

355 # Put the dataset again, since the last thing we did was remove it 

356 # and we want to use the default collection. 

357 ref = butler.put(metric, refIn) 

358 

359 # Get with parameters 

360 stop = 4 

361 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

362 self.assertNotEqual(metric, sliced) 

363 self.assertEqual(metric.summary, sliced.summary) 

364 self.assertEqual(metric.output, sliced.output) 

365 self.assertEqual(metric.data[:stop], sliced.data) 

366 # getDeferred with parameters 

367 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

368 self.assertNotEqual(metric, sliced) 

369 self.assertEqual(metric.summary, sliced.summary) 

370 self.assertEqual(metric.output, sliced.output) 

371 self.assertEqual(metric.data[:stop], sliced.data) 

372 # getDeferred with deferred parameters 

373 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

374 self.assertNotEqual(metric, sliced) 

375 self.assertEqual(metric.summary, sliced.summary) 

376 self.assertEqual(metric.output, sliced.output) 

377 self.assertEqual(metric.data[:stop], sliced.data) 

378 

379 if storageClass.isComposite(): 

380 # Check that components can be retrieved 

381 metricOut = butler.get(ref.datasetType.name, dataId) 

382 compNameS = ref.datasetType.componentTypeName("summary") 

383 compNameD = ref.datasetType.componentTypeName("data") 

384 summary = butler.get(compNameS, dataId) 

385 self.assertEqual(summary, metric.summary) 

386 data = butler.get(compNameD, dataId) 

387 self.assertEqual(data, metric.data) 

388 

389 if "counter" in storageClass.derivedComponents: 

390 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

391 self.assertEqual(count, len(data)) 

392 

393 count = butler.get( 

394 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

395 ) 

396 self.assertEqual(count, stop) 

397 

398 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

399 summary = butler.getDirect(compRef) 

400 self.assertEqual(summary, metric.summary) 

401 

402 # Create a Dataset type that has the same name but is inconsistent. 

403 inconsistentDatasetType = DatasetType( 

404 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

405 ) 

406 

407 # Getting with a dataset type that does not match registry fails 

408 with self.assertRaises(ValueError): 

409 butler.get(inconsistentDatasetType, dataId) 

410 

411 # Combining a DatasetRef with a dataId should fail 

412 with self.assertRaises(ValueError): 

413 butler.get(ref, dataId) 

414 # Getting with an explicit ref should fail if the id doesn't match 

415 with self.assertRaises(ValueError): 

416 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

417 

418 # Getting a dataset with unknown parameters should fail 

419 with self.assertRaises(KeyError): 

420 butler.get(ref, parameters={"unsupported": True}) 

421 

422 # Check we have a collection 

423 collections = set(butler.registry.queryCollections()) 

424 self.assertEqual(collections, expected_collections) 

425 

426 # Clean up to check that we can remove something that may have 

427 # already had a component removed 

428 butler.pruneDatasets([ref], unstore=True, purge=True) 

429 

430 # Check that we can configure a butler to accept a put even 

431 # if it already has the dataset in registry. 

432 ref = butler.put(metric, refIn) 

433 

434 # Repeat put will fail. 

435 with self.assertRaises(ConflictingDefinitionError): 

436 butler.put(metric, refIn) 

437 

438 # Remove the datastore entry. 

439 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

440 

441 # Put will still fail 

442 with self.assertRaises(ConflictingDefinitionError): 

443 butler.put(metric, refIn) 

444 

445 # Allow the put to succeed 

446 butler._allow_put_of_predefined_dataset = True 

447 ref2 = butler.put(metric, refIn) 

448 self.assertEqual(ref2.id, ref.id) 

449 

450 # A second put will still fail but with a different exception 

451 # than before. 

452 with self.assertRaises(ConflictingDefinitionError): 

453 butler.put(metric, refIn) 

454 

455 # Reset the flag to avoid confusion 

456 butler._allow_put_of_predefined_dataset = False 

457 

458 # Leave the dataset in place since some downstream tests require 

459 # something to be present 

460 

461 return butler 

462 

463 def testDeferredCollectionPassing(self): 

464 # Construct a butler with no run or collection, but make it writeable. 

465 butler = Butler(self.tmpConfigFile, writeable=True) 

466 # Create and register a DatasetType 

467 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

468 datasetType = self.addDatasetType( 

469 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

470 ) 

471 # Add needed Dimensions 

472 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

473 butler.registry.insertDimensionData( 

474 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

475 ) 

476 butler.registry.insertDimensionData( 

477 "visit", 

478 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

479 ) 

480 dataId = {"instrument": "DummyCamComp", "visit": 423} 

481 # Create dataset. 

482 metric = makeExampleMetrics() 

483 # Register a new run and put dataset. 

484 run = "deferred" 

485 self.assertTrue(butler.registry.registerRun(run)) 

486 # Second time it will be allowed but indicate no-op 

487 self.assertFalse(butler.registry.registerRun(run)) 

488 ref = butler.put(metric, datasetType, dataId, run=run) 

489 # Putting with no run should fail with TypeError. 

490 with self.assertRaises(CollectionError): 

491 butler.put(metric, datasetType, dataId) 

492 # Dataset should exist. 

493 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

494 # We should be able to get the dataset back, but with and without 

495 # a deferred dataset handle. 

496 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

497 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

498 # Trying to find the dataset without any collection is a TypeError. 

499 with self.assertRaises(CollectionError): 

500 butler.datasetExists(datasetType, dataId) 

501 with self.assertRaises(CollectionError): 

502 butler.get(datasetType, dataId) 

503 # Associate the dataset with a different collection. 

504 butler.registry.registerCollection("tagged") 

505 butler.registry.associate("tagged", [ref]) 

506 # Deleting the dataset from the new collection should make it findable 

507 # in the original collection. 

508 butler.pruneDatasets([ref], tags=["tagged"]) 

509 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

510 

511 

512class ButlerTests(ButlerPutGetTests): 

513 """Tests for Butler.""" 

514 

515 useTempRoot = True 

516 

517 def setUp(self): 

518 """Create a new butler root for each test.""" 

519 self.root = makeTestTempDir(TESTDIR) 

520 Butler.makeRepo(self.root, config=Config(self.configFile)) 

521 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

522 

523 def testConstructor(self): 

524 """Independent test of constructor.""" 

525 butler = Butler(self.tmpConfigFile, run=self.default_run) 

526 self.assertIsInstance(butler, Butler) 

527 

528 # Check that butler.yaml is added automatically. 

529 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

530 config_dir = self.tmpConfigFile[: -len(end)] 

531 butler = Butler(config_dir, run=self.default_run) 

532 self.assertIsInstance(butler, Butler) 

533 

534 # Even with a ResourcePath. 

535 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

536 self.assertIsInstance(butler, Butler) 

537 

538 collections = set(butler.registry.queryCollections()) 

539 self.assertEqual(collections, {self.default_run}) 

540 

541 # Check that some special characters can be included in run name. 

542 special_run = "u@b.c-A" 

543 butler_special = Butler(butler=butler, run=special_run) 

544 collections = set(butler_special.registry.queryCollections("*@*")) 

545 self.assertEqual(collections, {special_run}) 

546 

547 butler2 = Butler(butler=butler, collections=["other"]) 

548 self.assertEqual(butler2.collections, ("other",)) 

549 self.assertIsNone(butler2.run) 

550 self.assertIs(butler.datastore, butler2.datastore) 

551 

552 # Test that we can use an environment variable to find this 

553 # repository. 

554 butler_index = Config() 

555 butler_index["label"] = self.tmpConfigFile 

556 for suffix in (".yaml", ".json"): 

557 # Ensure that the content differs so that we know that 

558 # we aren't reusing the cache. 

559 bad_label = f"s3://bucket/not_real{suffix}" 

560 butler_index["bad_label"] = bad_label 

561 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

562 butler_index.dumpToUri(temp_file) 

563 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

564 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label"))) 

565 uri = Butler.get_repo_uri("bad_label") 

566 self.assertEqual(uri, ResourcePath(bad_label)) 

567 uri = Butler.get_repo_uri("label") 

568 butler = Butler(uri, writeable=False) 

569 self.assertIsInstance(butler, Butler) 

570 butler = Butler("label", writeable=False) 

571 self.assertIsInstance(butler, Butler) 

572 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

573 Butler("not_there", writeable=False) 

574 with self.assertRaises(KeyError) as cm: 

575 Butler.get_repo_uri("missing") 

576 self.assertIn("not known to", str(cm.exception)) 

577 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

578 with self.assertRaises(FileNotFoundError): 

579 Butler.get_repo_uri("label") 

580 self.assertEqual(Butler.get_known_repos(), set()) 

581 with self.assertRaises(KeyError) as cm: 

582 # No environment variable set. 

583 Butler.get_repo_uri("label") 

584 self.assertIn("No repository index defined", str(cm.exception)) 

585 with self.assertRaisesRegex(FileNotFoundError, "no known aliases"): 

586 # No aliases registered. 

587 Butler("not_there") 

588 self.assertEqual(Butler.get_known_repos(), set()) 

589 

590 def testBasicPutGet(self): 

591 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

592 self.runPutGetTest(storageClass, "test_metric") 

593 

594 def testCompositePutGetConcrete(self): 

595 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

596 butler = self.runPutGetTest(storageClass, "test_metric") 

597 

598 # Should *not* be disassembled 

599 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

600 self.assertEqual(len(datasets), 1) 

601 uri, components = butler.getURIs(datasets[0]) 

602 self.assertIsInstance(uri, ResourcePath) 

603 self.assertFalse(components) 

604 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

605 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

606 

607 # Predicted dataset 

608 dataId = {"instrument": "DummyCamComp", "visit": 424} 

609 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

610 self.assertFalse(components) 

611 self.assertIsInstance(uri, ResourcePath) 

612 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

613 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

614 

615 def testCompositePutGetVirtual(self): 

616 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

617 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

618 

619 # Should be disassembled 

620 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

621 self.assertEqual(len(datasets), 1) 

622 uri, components = butler.getURIs(datasets[0]) 

623 

624 if butler.datastore.isEphemeral: 

625 # Never disassemble in-memory datastore 

626 self.assertIsInstance(uri, ResourcePath) 

627 self.assertFalse(components) 

628 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

629 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

630 else: 

631 self.assertIsNone(uri) 

632 self.assertEqual(set(components), set(storageClass.components)) 

633 for compuri in components.values(): 

634 self.assertIsInstance(compuri, ResourcePath) 

635 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

636 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

637 

638 # Predicted dataset 

639 dataId = {"instrument": "DummyCamComp", "visit": 424} 

640 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

641 

642 if butler.datastore.isEphemeral: 

643 # Never disassembled 

644 self.assertIsInstance(uri, ResourcePath) 

645 self.assertFalse(components) 

646 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

647 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

648 else: 

649 self.assertIsNone(uri) 

650 self.assertEqual(set(components), set(storageClass.components)) 

651 for compuri in components.values(): 

652 self.assertIsInstance(compuri, ResourcePath) 

653 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

654 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

655 

656 def testStorageClassOverrideGet(self): 

657 """Test storage class conversion on get with override.""" 

658 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

659 datasetTypeName = "anything" 

660 run = self.default_run 

661 

662 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

663 

664 # Create and store a dataset. 

665 metric = makeExampleMetrics() 

666 dataId = {"instrument": "DummyCamComp", "visit": 423} 

667 

668 ref = butler.put(metric, datasetType, dataId) 

669 

670 # Return native type. 

671 retrieved = butler.get(ref) 

672 self.assertEqual(retrieved, metric) 

673 

674 # Specify an override. 

675 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion") 

676 model = butler.getDirect(ref, storageClass=new_sc) 

677 self.assertNotEqual(type(model), type(retrieved)) 

678 self.assertIs(type(model), new_sc.pytype) 

679 self.assertEqual(retrieved, model) 

680 

681 # Defer but override later. 

682 deferred = butler.getDirectDeferred(ref) 

683 model = deferred.get(storageClass=new_sc) 

684 self.assertIs(type(model), new_sc.pytype) 

685 self.assertEqual(retrieved, model) 

686 

687 # Defer but override up front. 

688 deferred = butler.getDirectDeferred(ref, storageClass=new_sc) 

689 model = deferred.get() 

690 self.assertIs(type(model), new_sc.pytype) 

691 self.assertEqual(retrieved, model) 

692 

693 # Retrieve a component. Should be a tuple. 

694 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple") 

695 self.assertIs(type(data), tuple) 

696 self.assertEqual(data, tuple(retrieved.data)) 

697 

698 # Parameter on the write storage class should work regardless 

699 # of read storage class. 

700 data = butler.get( 

701 "anything.data", 

702 dataId, 

703 storageClass="StructuredDataDataTestTuple", 

704 parameters={"slice": slice(2, 4)}, 

705 ) 

706 self.assertEqual(len(data), 2) 

707 

708 # Try a parameter that is known to the read storage class but not 

709 # the write storage class. 

710 with self.assertRaises(KeyError): 

711 butler.get( 

712 "anything.data", 

713 dataId, 

714 storageClass="StructuredDataDataTestTuple", 

715 parameters={"xslice": slice(2, 4)}, 

716 ) 

717 

718 def testPytypePutCoercion(self): 

719 """Test python type coercion on Butler.get and put.""" 

720 

721 # Store some data with the normal example storage class. 

722 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

723 datasetTypeName = "test_metric" 

724 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

725 

726 dataId = {"instrument": "DummyCamComp", "visit": 423} 

727 

728 # Put a dict and this should coerce to a MetricsExample 

729 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

730 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

731 test_metric = butler.getDirect(metric_ref) 

732 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

733 self.assertEqual(test_metric.summary, test_dict["summary"]) 

734 self.assertEqual(test_metric.output, test_dict["output"]) 

735 

736 # Check that the put still works if a DatasetType is given with 

737 # a definition matching this python type. 

738 registry_type = butler.registry.getDatasetType(datasetTypeName) 

739 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

740 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

741 self.assertEqual(metric2_ref.datasetType, registry_type) 

742 

743 # The get will return the type expected by registry. 

744 test_metric2 = butler.getDirect(metric2_ref) 

745 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

746 

747 # Make a new DatasetRef with the compatible but different DatasetType. 

748 # This should now return a dict. 

749 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

750 test_dict2 = butler.getDirect(new_ref) 

751 self.assertEqual(get_full_type_name(test_dict2), "dict") 

752 

753 # Get it again with the wrong dataset type definition using get() 

754 # rather than getDirect(). This should be consistent with getDirect() 

755 # behavior and return the type of the DatasetType. 

756 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

757 self.assertEqual(get_full_type_name(test_dict3), "dict") 

758 

759 def testIngest(self): 

760 butler = Butler(self.tmpConfigFile, run=self.default_run) 

761 

762 # Create and register a DatasetType 

763 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

764 

765 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

766 datasetTypeName = "metric" 

767 

768 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

769 

770 # Add needed Dimensions 

771 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

772 butler.registry.insertDimensionData( 

773 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

774 ) 

775 for detector in (1, 2): 

776 butler.registry.insertDimensionData( 

777 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

778 ) 

779 

780 butler.registry.insertDimensionData( 

781 "visit", 

782 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

783 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

784 ) 

785 

786 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

787 dataRoot = os.path.join(TESTDIR, "data", "basic") 

788 datasets = [] 

789 for detector in (1, 2): 

790 detector_name = f"detector_{detector}" 

791 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

792 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

793 # Create a DatasetRef for ingest 

794 refIn = DatasetRef(datasetType, dataId, id=None) 

795 

796 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

797 

798 butler.ingest(*datasets, transfer="copy") 

799 

800 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

801 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

802 

803 metrics1 = butler.get(datasetTypeName, dataId1) 

804 metrics2 = butler.get(datasetTypeName, dataId2) 

805 self.assertNotEqual(metrics1, metrics2) 

806 

807 # Compare URIs 

808 uri1 = butler.getURI(datasetTypeName, dataId1) 

809 uri2 = butler.getURI(datasetTypeName, dataId2) 

810 self.assertNotEqual(uri1, uri2) 

811 

812 # Now do a multi-dataset but single file ingest 

813 metricFile = os.path.join(dataRoot, "detectors.yaml") 

814 refs = [] 

815 for detector in (1, 2): 

816 detector_name = f"detector_{detector}" 

817 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

818 # Create a DatasetRef for ingest 

819 refs.append(DatasetRef(datasetType, dataId, id=None)) 

820 

821 # Test "move" transfer to ensure that the files themselves 

822 # have disappeared following ingest. 

823 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile: 

824 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy") 

825 

826 datasets = [] 

827 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter)) 

828 

829 butler.ingest(*datasets, transfer="move", record_validation_info=False) 

830 self.assertFalse(tempFile.exists()) 

831 

832 # Check that the datastore recorded no file size. 

833 # Not all datastores can support this. 

834 try: 

835 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0]) 

836 self.assertEqual(infos[0].file_size, -1) 

837 except AttributeError: 

838 pass 

839 

840 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

841 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

842 

843 multi1 = butler.get(datasetTypeName, dataId1) 

844 multi2 = butler.get(datasetTypeName, dataId2) 

845 

846 self.assertEqual(multi1, metrics1) 

847 self.assertEqual(multi2, metrics2) 

848 

849 # Compare URIs 

850 uri1 = butler.getURI(datasetTypeName, dataId1) 

851 uri2 = butler.getURI(datasetTypeName, dataId2) 

852 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

853 

854 # Test that removing one does not break the second 

855 # This line will issue a warning log message for a ChainedDatastore 

856 # that uses an InMemoryDatastore since in-memory can not ingest 

857 # files. 

858 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

859 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

860 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

861 multi2b = butler.get(datasetTypeName, dataId2) 

862 self.assertEqual(multi2, multi2b) 

863 

864 def testPruneCollections(self): 

865 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

866 butler = Butler(self.tmpConfigFile, writeable=True) 

867 # Load registry data with dimensions to hang datasets off of. 

868 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

869 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

870 # Add some RUN-type collections. 

871 run1 = "run1" 

872 butler.registry.registerRun(run1) 

873 run2 = "run2" 

874 butler.registry.registerRun(run2) 

875 # put some datasets. ref1 and ref2 have the same data ID, and are in 

876 # different runs. ref3 has a different data ID. 

877 metric = makeExampleMetrics() 

878 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

879 datasetType = self.addDatasetType( 

880 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

881 ) 

882 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

883 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

884 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

885 

886 # Try to delete a RUN collection without purge, or with purge and not 

887 # unstore. 

888 with self.assertRaises(TypeError): 

889 butler.pruneCollection(run1) 

890 with self.assertRaises(TypeError): 

891 butler.pruneCollection(run2, purge=True) 

892 # Add a TAGGED collection and associate ref3 only into it. 

893 tag1 = "tag1" 

894 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

895 self.assertTrue(registered) 

896 # Registering a second time should be allowed. 

897 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

898 self.assertFalse(registered) 

899 butler.registry.associate(tag1, [ref3]) 

900 # Add a CHAINED collection that searches run1 and then run2. It 

901 # logically contains only ref1, because ref2 is shadowed due to them 

902 # having the same data ID and dataset type. 

903 chain1 = "chain1" 

904 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

905 butler.registry.setCollectionChain(chain1, [run1, run2]) 

906 # Try to delete RUN collections, which should fail with complete 

907 # rollback because they're still referenced by the CHAINED 

908 # collection. 

909 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

910 butler.pruneCollection(run1, purge=True, unstore=True) 

911 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

912 butler.pruneCollection(run2, purge=True, unstore=True) 

913 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

914 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

915 self.assertTrue(existence[ref1]) 

916 self.assertTrue(existence[ref2]) 

917 self.assertTrue(existence[ref3]) 

918 # Try to delete CHAINED and TAGGED collections with purge; should not 

919 # work. 

920 with self.assertRaises(TypeError): 

921 butler.pruneCollection(tag1, purge=True, unstore=True) 

922 with self.assertRaises(TypeError): 

923 butler.pruneCollection(chain1, purge=True, unstore=True) 

924 # Remove the tagged collection with unstore=False. This should not 

925 # affect the datasets. 

926 butler.pruneCollection(tag1) 

927 with self.assertRaises(MissingCollectionError): 

928 butler.registry.getCollectionType(tag1) 

929 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

930 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

931 self.assertTrue(existence[ref1]) 

932 self.assertTrue(existence[ref2]) 

933 self.assertTrue(existence[ref3]) 

934 # Add the tagged collection back in, and remove it with unstore=True. 

935 # This should remove ref3 only from the datastore. 

936 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

937 butler.registry.associate(tag1, [ref3]) 

938 butler.pruneCollection(tag1, unstore=True) 

939 with self.assertRaises(MissingCollectionError): 

940 butler.registry.getCollectionType(tag1) 

941 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

942 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

943 self.assertTrue(existence[ref1]) 

944 self.assertTrue(existence[ref2]) 

945 self.assertFalse(existence[ref3]) 

946 # Delete the chain with unstore=False. The datasets should not be 

947 # affected at all. 

948 butler.pruneCollection(chain1) 

949 with self.assertRaises(MissingCollectionError): 

950 butler.registry.getCollectionType(chain1) 

951 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

952 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

953 self.assertTrue(existence[ref1]) 

954 self.assertTrue(existence[ref2]) 

955 self.assertFalse(existence[ref3]) 

956 existence = butler.datastore.knows_these([ref1, ref2, ref3]) 

957 self.assertTrue(existence[ref1]) 

958 self.assertTrue(existence[ref2]) 

959 self.assertFalse(existence[ref3]) 

960 # Redefine and then delete the chain with unstore=True. Only ref1 

961 # should be unstored (ref3 has already been unstored, but otherwise 

962 # would be now). 

963 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

964 butler.registry.setCollectionChain(chain1, [run1, run2]) 

965 butler.pruneCollection(chain1, unstore=True) 

966 with self.assertRaises(MissingCollectionError): 

967 butler.registry.getCollectionType(chain1) 

968 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

969 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

970 self.assertFalse(existence[ref1]) 

971 self.assertTrue(existence[ref2]) 

972 self.assertFalse(existence[ref3]) 

973 # Remove run1. This removes ref1 and ref3 from the registry (they're 

974 # already gone from the datastore, which is fine). 

975 butler.pruneCollection(run1, purge=True, unstore=True) 

976 with self.assertRaises(MissingCollectionError): 

977 butler.registry.getCollectionType(run1) 

978 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref2]) 

979 self.assertTrue(butler.datastore.exists(ref2)) 

980 self.assertTrue(butler.datastore.knows(ref2)) 

981 # Remove run2. This removes ref2 from the registry and the datastore. 

982 butler.pruneCollection(run2, purge=True, unstore=True) 

983 with self.assertRaises(MissingCollectionError): 

984 butler.registry.getCollectionType(run2) 

985 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), []) 

986 

987 # Now that the collections have been pruned we can remove the 

988 # dataset type 

989 butler.registry.removeDatasetType(datasetType.name) 

990 

991 def testPickle(self): 

992 """Test pickle support.""" 

993 butler = Butler(self.tmpConfigFile, run=self.default_run) 

994 butlerOut = pickle.loads(pickle.dumps(butler)) 

995 self.assertIsInstance(butlerOut, Butler) 

996 self.assertEqual(butlerOut._config, butler._config) 

997 self.assertEqual(butlerOut.collections, butler.collections) 

998 self.assertEqual(butlerOut.run, butler.run) 

999 

1000 def testGetDatasetTypes(self): 

1001 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1002 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

1003 dimensionEntries = [ 

1004 ( 

1005 "instrument", 

1006 {"instrument": "DummyCam"}, 

1007 {"instrument": "DummyHSC"}, 

1008 {"instrument": "DummyCamComp"}, 

1009 ), 

1010 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

1011 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

1012 ] 

1013 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1014 # Add needed Dimensions 

1015 for args in dimensionEntries: 

1016 butler.registry.insertDimensionData(*args) 

1017 

1018 # When a DatasetType is added to the registry entries are not created 

1019 # for components but querying them can return the components. 

1020 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

1021 components = set() 

1022 for datasetTypeName in datasetTypeNames: 

1023 # Create and register a DatasetType 

1024 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1025 

1026 for componentName in storageClass.components: 

1027 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

1028 

1029 fromRegistry: set[DatasetType] = set() 

1030 for parent_dataset_type in butler.registry.queryDatasetTypes(): 

1031 fromRegistry.add(parent_dataset_type) 

1032 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes()) 

1033 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

1034 

1035 # Now that we have some dataset types registered, validate them 

1036 butler.validateConfiguration( 

1037 ignore=[ 

1038 "test_metric_comp", 

1039 "metric3", 

1040 "metric5", 

1041 "calexp", 

1042 "DummySC", 

1043 "datasetType.component", 

1044 "random_data", 

1045 "random_data_2", 

1046 ] 

1047 ) 

1048 

1049 # Add a new datasetType that will fail template validation 

1050 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

1051 if self.validationCanFail: 

1052 with self.assertRaises(ValidationError): 

1053 butler.validateConfiguration() 

1054 

1055 # Rerun validation but with a subset of dataset type names 

1056 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

1057 

1058 # Rerun validation but ignore the bad datasetType 

1059 butler.validateConfiguration( 

1060 ignore=[ 

1061 "test_metric_comp", 

1062 "metric3", 

1063 "metric5", 

1064 "calexp", 

1065 "DummySC", 

1066 "datasetType.component", 

1067 "random_data", 

1068 "random_data_2", 

1069 ] 

1070 ) 

1071 

1072 def testTransaction(self): 

1073 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1074 datasetTypeName = "test_metric" 

1075 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1076 dimensionEntries = ( 

1077 ("instrument", {"instrument": "DummyCam"}), 

1078 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

1079 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

1080 ) 

1081 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1082 metric = makeExampleMetrics() 

1083 dataId = {"instrument": "DummyCam", "visit": 42} 

1084 # Create and register a DatasetType 

1085 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1086 with self.assertRaises(TransactionTestError): 

1087 with butler.transaction(): 

1088 # Add needed Dimensions 

1089 for args in dimensionEntries: 

1090 butler.registry.insertDimensionData(*args) 

1091 # Store a dataset 

1092 ref = butler.put(metric, datasetTypeName, dataId) 

1093 self.assertIsInstance(ref, DatasetRef) 

1094 # Test getDirect 

1095 metricOut = butler.getDirect(ref) 

1096 self.assertEqual(metric, metricOut) 

1097 # Test get 

1098 metricOut = butler.get(datasetTypeName, dataId) 

1099 self.assertEqual(metric, metricOut) 

1100 # Check we can get components 

1101 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

1102 raise TransactionTestError("This should roll back the entire transaction") 

1103 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

1104 butler.registry.expandDataId(dataId) 

1105 # Should raise LookupError for missing data ID value 

1106 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

1107 butler.get(datasetTypeName, dataId) 

1108 # Also check explicitly if Dataset entry is missing 

1109 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

1110 # Direct retrieval should not find the file in the Datastore 

1111 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

1112 butler.getDirect(ref) 

1113 

1114 def testMakeRepo(self): 

1115 """Test that we can write butler configuration to a new repository via 

1116 the Butler.makeRepo interface and then instantiate a butler from the 

1117 repo root. 

1118 """ 

1119 # Do not run the test if we know this datastore configuration does 

1120 # not support a file system root 

1121 if self.fullConfigKey is None: 

1122 return 

1123 

1124 # create two separate directories 

1125 root1 = tempfile.mkdtemp(dir=self.root) 

1126 root2 = tempfile.mkdtemp(dir=self.root) 

1127 

1128 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

1129 limited = Config(self.configFile) 

1130 butler1 = Butler(butlerConfig) 

1131 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

1132 full = Config(self.tmpConfigFile) 

1133 butler2 = Butler(butlerConfig) 

1134 # Butlers should have the same configuration regardless of whether 

1135 # defaults were expanded. 

1136 self.assertEqual(butler1._config, butler2._config) 

1137 # Config files loaded directly should not be the same. 

1138 self.assertNotEqual(limited, full) 

1139 # Make sure "limited" doesn't have a few keys we know it should be 

1140 # inheriting from defaults. 

1141 self.assertIn(self.fullConfigKey, full) 

1142 self.assertNotIn(self.fullConfigKey, limited) 

1143 

1144 # Collections don't appear until something is put in them 

1145 collections1 = set(butler1.registry.queryCollections()) 

1146 self.assertEqual(collections1, set()) 

1147 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1148 

1149 # Check that a config with no associated file name will not 

1150 # work properly with relocatable Butler repo 

1151 butlerConfig.configFile = None 

1152 with self.assertRaises(ValueError): 

1153 Butler(butlerConfig) 

1154 

1155 with self.assertRaises(FileExistsError): 

1156 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1157 

1158 def testStringification(self): 

1159 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1160 butlerStr = str(butler) 

1161 

1162 if self.datastoreStr is not None: 

1163 for testStr in self.datastoreStr: 

1164 self.assertIn(testStr, butlerStr) 

1165 if self.registryStr is not None: 

1166 self.assertIn(self.registryStr, butlerStr) 

1167 

1168 datastoreName = butler.datastore.name 

1169 if self.datastoreName is not None: 

1170 for testStr in self.datastoreName: 

1171 self.assertIn(testStr, datastoreName) 

1172 

1173 def testButlerRewriteDataId(self): 

1174 """Test that dataIds can be rewritten based on dimension records.""" 

1175 

1176 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1177 

1178 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1179 datasetTypeName = "random_data" 

1180 

1181 # Create dimension records. 

1182 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1183 butler.registry.insertDimensionData( 

1184 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1185 ) 

1186 butler.registry.insertDimensionData( 

1187 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1188 ) 

1189 

1190 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1191 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1192 butler.registry.registerDatasetType(datasetType) 

1193 

1194 n_exposures = 5 

1195 dayobs = 20210530 

1196 

1197 for i in range(n_exposures): 

1198 butler.registry.insertDimensionData( 

1199 "exposure", 

1200 { 

1201 "instrument": "DummyCamComp", 

1202 "id": i, 

1203 "obs_id": f"exp{i}", 

1204 "seq_num": i, 

1205 "day_obs": dayobs, 

1206 "physical_filter": "d-r", 

1207 }, 

1208 ) 

1209 

1210 # Write some data. 

1211 for i in range(n_exposures): 

1212 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1213 

1214 # Use the seq_num for the put to test rewriting. 

1215 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1216 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1217 

1218 # Check that the exposure is correct in the dataId 

1219 self.assertEqual(ref.dataId["exposure"], i) 

1220 

1221 # and check that we can get the dataset back with the same dataId 

1222 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1223 self.assertEqual(new_metric, metric) 

1224 

1225 

1226class FileDatastoreButlerTests(ButlerTests): 

1227 """Common tests and specialization of ButlerTests for butlers backed 

1228 by datastores that inherit from FileDatastore. 

1229 """ 

1230 

1231 def checkFileExists(self, root, relpath): 

1232 """Checks if file exists at a given path (relative to root). 

1233 

1234 Test testPutTemplates verifies actual physical existance of the files 

1235 in the requested location. 

1236 """ 

1237 uri = ResourcePath(root, forceDirectory=True) 

1238 return uri.join(relpath).exists() 

1239 

1240 def testPutTemplates(self): 

1241 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1242 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1243 

1244 # Add needed Dimensions 

1245 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1246 butler.registry.insertDimensionData( 

1247 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1248 ) 

1249 butler.registry.insertDimensionData( 

1250 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1251 ) 

1252 butler.registry.insertDimensionData( 

1253 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1254 ) 

1255 

1256 # Create and store a dataset 

1257 metric = makeExampleMetrics() 

1258 

1259 # Create two almost-identical DatasetTypes (both will use default 

1260 # template) 

1261 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1262 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1263 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1264 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1265 

1266 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1267 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1268 

1269 # Put with exactly the data ID keys needed 

1270 ref = butler.put(metric, "metric1", dataId1) 

1271 uri = butler.getURI(ref) 

1272 self.assertTrue(uri.exists()) 

1273 self.assertTrue( 

1274 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle") 

1275 ) 

1276 

1277 # Check the template based on dimensions 

1278 if hasattr(butler.datastore, "templates"): 

1279 butler.datastore.templates.validateTemplates([ref]) 

1280 

1281 # Put with extra data ID keys (physical_filter is an optional 

1282 # dependency); should not change template (at least the way we're 

1283 # defining them to behave now; the important thing is that they 

1284 # must be consistent). 

1285 ref = butler.put(metric, "metric2", dataId2) 

1286 uri = butler.getURI(ref) 

1287 self.assertTrue(uri.exists()) 

1288 self.assertTrue( 

1289 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle") 

1290 ) 

1291 

1292 # Check the template based on dimensions 

1293 if hasattr(butler.datastore, "templates"): 

1294 butler.datastore.templates.validateTemplates([ref]) 

1295 

1296 # Use a template that has a typo in dimension record metadata. 

1297 # Easier to test with a butler that has a ref with records attached. 

1298 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1299 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1300 path = template.format(ref) 

1301 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1302 

1303 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1304 with self.assertRaises(KeyError): 

1305 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1306 template.format(ref) 

1307 

1308 # Now use a file template that will not result in unique filenames 

1309 with self.assertRaises(FileTemplateValidationError): 

1310 butler.put(metric, "metric3", dataId1) 

1311 

1312 def testImportExport(self): 

1313 # Run put/get tests just to create and populate a repo. 

1314 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1315 self.runImportExportTest(storageClass) 

1316 

1317 @unittest.expectedFailure 

1318 def testImportExportVirtualComposite(self): 

1319 # Run put/get tests just to create and populate a repo. 

1320 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1321 self.runImportExportTest(storageClass) 

1322 

1323 def runImportExportTest(self, storageClass): 

1324 """This test does an export to a temp directory and an import back 

1325 into a new temp directory repo. It does not assume a posix datastore""" 

1326 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1327 # Test that the repo actually has at least one dataset. 

1328 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1329 self.assertGreater(len(datasets), 0) 

1330 # Add a DimensionRecord that's unused by those datasets. 

1331 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1332 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1333 # Export and then import datasets. 

1334 with safeTestTempDir(TESTDIR) as exportDir: 

1335 exportFile = os.path.join(exportDir, "exports.yaml") 

1336 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1337 export.saveDatasets(datasets) 

1338 # Export the same datasets again. This should quietly do 

1339 # nothing because of internal deduplication, and it shouldn't 

1340 # complain about being asked to export the "htm7" elements even 

1341 # though there aren't any in these datasets or in the database. 

1342 export.saveDatasets(datasets, elements=["htm7"]) 

1343 # Save one of the data IDs again; this should be harmless 

1344 # because of internal deduplication. 

1345 export.saveDataIds([datasets[0].dataId]) 

1346 # Save some dimension records directly. 

1347 export.saveDimensionData("skymap", [skymapRecord]) 

1348 self.assertTrue(os.path.exists(exportFile)) 

1349 with safeTestTempDir(TESTDIR) as importDir: 

1350 # We always want this to be a local posix butler 

1351 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1352 # Calling script.butlerImport tests the implementation of the 

1353 # butler command line interface "import" subcommand. Functions 

1354 # in the script folder are generally considered protected and 

1355 # should not be used as public api. 

1356 with open(exportFile, "r") as f: 

1357 script.butlerImport( 

1358 importDir, 

1359 export_file=f, 

1360 directory=exportDir, 

1361 transfer="auto", 

1362 skip_dimensions=None, 

1363 reuse_ids=False, 

1364 ) 

1365 importButler = Butler(importDir, run=self.default_run) 

1366 for ref in datasets: 

1367 with self.subTest(ref=ref): 

1368 # Test for existence by passing in the DatasetType and 

1369 # data ID separately, to avoid lookup by dataset_id. 

1370 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1371 self.assertEqual( 

1372 list(importButler.registry.queryDimensionRecords("skymap")), 

1373 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)], 

1374 ) 

1375 

1376 def testRemoveRuns(self): 

1377 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1378 butler = Butler(self.tmpConfigFile, writeable=True) 

1379 # Load registry data with dimensions to hang datasets off of. 

1380 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1381 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1382 # Add some RUN-type collection. 

1383 run1 = "run1" 

1384 butler.registry.registerRun(run1) 

1385 run2 = "run2" 

1386 butler.registry.registerRun(run2) 

1387 # put a dataset in each 

1388 metric = makeExampleMetrics() 

1389 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1390 datasetType = self.addDatasetType( 

1391 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1392 ) 

1393 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1394 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1395 uri1 = butler.getURI(ref1, collections=[run1]) 

1396 uri2 = butler.getURI(ref2, collections=[run2]) 

1397 # Remove from both runs with different values for unstore. 

1398 butler.removeRuns([run1], unstore=True) 

1399 butler.removeRuns([run2], unstore=False) 

1400 # Should be nothing in registry for either one, and datastore should 

1401 # not think either exists. 

1402 with self.assertRaises(MissingCollectionError): 

1403 butler.registry.getCollectionType(run1) 

1404 with self.assertRaises(MissingCollectionError): 

1405 butler.registry.getCollectionType(run2) 

1406 self.assertFalse(butler.datastore.exists(ref1)) 

1407 self.assertFalse(butler.datastore.exists(ref2)) 

1408 # The ref we unstored should be gone according to the URI, but the 

1409 # one we forgot should still be around. 

1410 self.assertFalse(uri1.exists()) 

1411 self.assertTrue(uri2.exists()) 

1412 

1413 

1414class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1415 """PosixDatastore specialization of a butler""" 

1416 

1417 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1418 fullConfigKey = ".datastore.formatters" 

1419 validationCanFail = True 

1420 datastoreStr = ["/tmp"] 

1421 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1422 registryStr = "/gen3.sqlite3" 

1423 

1424 def testPathConstructor(self): 

1425 """Independent test of constructor using PathLike.""" 

1426 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1427 self.assertIsInstance(butler, Butler) 

1428 

1429 # And again with a Path object with the butler yaml 

1430 path = pathlib.Path(self.tmpConfigFile) 

1431 butler = Butler(path, writeable=False) 

1432 self.assertIsInstance(butler, Butler) 

1433 

1434 # And again with a Path object without the butler yaml 

1435 # (making sure we skip it if the tmp config doesn't end 

1436 # in butler.yaml -- which is the case for a subclass) 

1437 if self.tmpConfigFile.endswith("butler.yaml"): 

1438 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1439 butler = Butler(path, writeable=False) 

1440 self.assertIsInstance(butler, Butler) 

1441 

1442 def testExportTransferCopy(self): 

1443 """Test local export using all transfer modes""" 

1444 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1445 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1446 # Test that the repo actually has at least one dataset. 

1447 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1448 self.assertGreater(len(datasets), 0) 

1449 uris = [exportButler.getURI(d) for d in datasets] 

1450 datastoreRoot = exportButler.datastore.root 

1451 

1452 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1453 

1454 for path in pathsInStore: 

1455 # Assume local file system 

1456 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1457 

1458 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1459 with safeTestTempDir(TESTDIR) as exportDir: 

1460 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1461 export.saveDatasets(datasets) 

1462 for path in pathsInStore: 

1463 self.assertTrue( 

1464 self.checkFileExists(exportDir, path), 

1465 f"Check that mode {transfer} exported files", 

1466 ) 

1467 

1468 def testPruneDatasets(self): 

1469 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1470 butler = Butler(self.tmpConfigFile, writeable=True) 

1471 # Load registry data with dimensions to hang datasets off of. 

1472 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1473 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1474 # Add some RUN-type collections. 

1475 run1 = "run1" 

1476 butler.registry.registerRun(run1) 

1477 run2 = "run2" 

1478 butler.registry.registerRun(run2) 

1479 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1480 # different runs. ref3 has a different data ID. 

1481 metric = makeExampleMetrics() 

1482 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1483 datasetType = self.addDatasetType( 

1484 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1485 ) 

1486 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1487 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1488 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1489 

1490 # Simple prune. 

1491 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1492 with self.assertRaises(LookupError): 

1493 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1) 

1494 

1495 # Put data back. 

1496 ref1 = butler.put(metric, ref1.unresolved(), run=run1) 

1497 ref2 = butler.put(metric, ref2.unresolved(), run=run2) 

1498 ref3 = butler.put(metric, ref3.unresolved(), run=run1) 

1499 

1500 # Check that in normal mode, deleting the record will lead to 

1501 # trash not touching the file. 

1502 uri1 = butler.datastore.getURI(ref1) 

1503 butler.datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table 

1504 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id}) 

1505 butler.datastore.trash(ref1) 

1506 butler.datastore.emptyTrash() 

1507 self.assertTrue(uri1.exists()) 

1508 uri1.remove() # Clean it up. 

1509 

1510 # Simulate execution butler setup by deleting the datastore 

1511 # record but keeping the file around and trusting. 

1512 butler.datastore.trustGetRequest = True 

1513 uri2 = butler.datastore.getURI(ref2) 

1514 uri3 = butler.datastore.getURI(ref3) 

1515 self.assertTrue(uri2.exists()) 

1516 self.assertTrue(uri3.exists()) 

1517 

1518 # Remove the datastore record. 

1519 butler.datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table 

1520 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id}) 

1521 self.assertTrue(uri2.exists()) 

1522 butler.datastore.trash([ref2, ref3]) 

1523 # Immediate removal for ref2 file 

1524 self.assertFalse(uri2.exists()) 

1525 # But ref3 has to wait for the empty. 

1526 self.assertTrue(uri3.exists()) 

1527 butler.datastore.emptyTrash() 

1528 self.assertFalse(uri3.exists()) 

1529 

1530 # Clear out the datasets from registry. 

1531 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1532 

1533 def testPytypeCoercion(self): 

1534 """Test python type coercion on Butler.get and put.""" 

1535 

1536 # Store some data with the normal example storage class. 

1537 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1538 datasetTypeName = "test_metric" 

1539 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1540 

1541 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1542 metric = butler.get(datasetTypeName, dataId=dataId) 

1543 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1544 

1545 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1546 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1547 

1548 # Now need to hack the registry dataset type definition. 

1549 # There is no API for this. 

1550 manager = butler.registry._managers.datasets 

1551 manager._db.update( 

1552 manager._static.dataset_type, 

1553 {"name": datasetTypeName}, 

1554 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1555 ) 

1556 

1557 # Force reset of dataset type cache 

1558 butler.registry.refresh() 

1559 

1560 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1561 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1562 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1563 

1564 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1565 self.assertNotEqual(type(metric_model), type(metric)) 

1566 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1567 

1568 # Put the model and read it back to show that everything now 

1569 # works as normal. 

1570 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1571 metric_model_new = butler.get(metric_ref) 

1572 self.assertEqual(metric_model_new, metric_model) 

1573 

1574 # Hack the storage class again to something that will fail on the 

1575 # get with no conversion class. 

1576 manager._db.update( 

1577 manager._static.dataset_type, 

1578 {"name": datasetTypeName}, 

1579 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1580 ) 

1581 butler.registry.refresh() 

1582 

1583 with self.assertRaises(ValueError): 

1584 butler.get(datasetTypeName, dataId=dataId) 

1585 

1586 

1587@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1588class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1589 """PosixDatastore specialization of a butler using Postgres""" 

1590 

1591 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1592 fullConfigKey = ".datastore.formatters" 

1593 validationCanFail = True 

1594 datastoreStr = ["/tmp"] 

1595 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1596 registryStr = "PostgreSQL@test" 

1597 

1598 @staticmethod 

1599 def _handler(postgresql): 

1600 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1601 with engine.begin() as connection: 

1602 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1603 

1604 @classmethod 

1605 def setUpClass(cls): 

1606 # Create the postgres test server. 

1607 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1608 cache_initialized_db=True, on_initialized=cls._handler 

1609 ) 

1610 super().setUpClass() 

1611 

1612 @classmethod 

1613 def tearDownClass(cls): 

1614 # Clean up any lingering SQLAlchemy engines/connections 

1615 # so they're closed before we shut down the server. 

1616 gc.collect() 

1617 cls.postgresql.clear_cache() 

1618 super().tearDownClass() 

1619 

1620 def setUp(self): 

1621 self.server = self.postgresql() 

1622 

1623 # Need to add a registry section to the config. 

1624 self._temp_config = False 

1625 config = Config(self.configFile) 

1626 config["registry", "db"] = self.server.url() 

1627 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1628 config.dump(fh) 

1629 self.configFile = fh.name 

1630 self._temp_config = True 

1631 super().setUp() 

1632 

1633 def tearDown(self): 

1634 self.server.stop() 

1635 if self._temp_config and os.path.exists(self.configFile): 

1636 os.remove(self.configFile) 

1637 super().tearDown() 

1638 

1639 def testMakeRepo(self): 

1640 # The base class test assumes that it's using sqlite and assumes 

1641 # the config file is acceptable to sqlite. 

1642 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1643 

1644 

1645class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1646 """InMemoryDatastore specialization of a butler""" 

1647 

1648 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1649 fullConfigKey = None 

1650 useTempRoot = False 

1651 validationCanFail = False 

1652 datastoreStr = ["datastore='InMemory"] 

1653 datastoreName = ["InMemoryDatastore@"] 

1654 registryStr = "/gen3.sqlite3" 

1655 

1656 def testIngest(self): 

1657 pass 

1658 

1659 

1660class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1661 """PosixDatastore specialization""" 

1662 

1663 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1664 fullConfigKey = ".datastore.datastores.1.formatters" 

1665 validationCanFail = True 

1666 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1667 datastoreName = [ 

1668 "InMemoryDatastore@", 

1669 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1670 "SecondDatastore", 

1671 ] 

1672 registryStr = "/gen3.sqlite3" 

1673 

1674 

1675class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1676 """Test that a yaml file in one location can refer to a root in another.""" 

1677 

1678 datastoreStr = ["dir1"] 

1679 # Disable the makeRepo test since we are deliberately not using 

1680 # butler.yaml as the config name. 

1681 fullConfigKey = None 

1682 

1683 def setUp(self): 

1684 self.root = makeTestTempDir(TESTDIR) 

1685 

1686 # Make a new repository in one place 

1687 self.dir1 = os.path.join(self.root, "dir1") 

1688 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1689 

1690 # Move the yaml file to a different place and add a "root" 

1691 self.dir2 = os.path.join(self.root, "dir2") 

1692 os.makedirs(self.dir2, exist_ok=True) 

1693 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1694 config = Config(configFile1) 

1695 config["root"] = self.dir1 

1696 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1697 config.dumpToUri(configFile2) 

1698 os.remove(configFile1) 

1699 self.tmpConfigFile = configFile2 

1700 

1701 def testFileLocations(self): 

1702 self.assertNotEqual(self.dir1, self.dir2) 

1703 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1704 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1705 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1706 

1707 

1708class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1709 """Test that a config file created by makeRepo outside of repo works.""" 

1710 

1711 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1712 

1713 def setUp(self): 

1714 self.root = makeTestTempDir(TESTDIR) 

1715 self.root2 = makeTestTempDir(TESTDIR) 

1716 

1717 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1718 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1719 

1720 def tearDown(self): 

1721 if os.path.exists(self.root2): 

1722 shutil.rmtree(self.root2, ignore_errors=True) 

1723 super().tearDown() 

1724 

1725 def testConfigExistence(self): 

1726 c = Config(self.tmpConfigFile) 

1727 uri_config = ResourcePath(c["root"]) 

1728 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1729 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1730 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1731 

1732 def testPutGet(self): 

1733 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1734 self.runPutGetTest(storageClass, "test_metric") 

1735 

1736 

1737class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1738 """Test that a config file created by makeRepo outside of repo works.""" 

1739 

1740 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1741 

1742 def setUp(self): 

1743 self.root = makeTestTempDir(TESTDIR) 

1744 self.root2 = makeTestTempDir(TESTDIR) 

1745 

1746 self.tmpConfigFile = self.root2 

1747 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1748 

1749 def testConfigExistence(self): 

1750 # Append the yaml file else Config constructor does not know the file 

1751 # type. 

1752 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1753 super().testConfigExistence() 

1754 

1755 

1756class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1757 """Test that a config file created by makeRepo outside of repo works.""" 

1758 

1759 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1760 

1761 def setUp(self): 

1762 self.root = makeTestTempDir(TESTDIR) 

1763 self.root2 = makeTestTempDir(TESTDIR) 

1764 

1765 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1766 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1767 

1768 

1769@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1770class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1771 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1772 a local in-memory SqlRegistry. 

1773 """ 

1774 

1775 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1776 fullConfigKey = None 

1777 validationCanFail = True 

1778 

1779 bucketName = "anybucketname" 

1780 """Name of the Bucket that will be used in the tests. The name is read from 

1781 the config file used with the tests during set-up. 

1782 """ 

1783 

1784 root = "butlerRoot/" 

1785 """Root repository directory expected to be used in case useTempRoot=False. 

1786 Otherwise the root is set to a 20 characters long randomly generated string 

1787 during set-up. 

1788 """ 

1789 

1790 datastoreStr = [f"datastore={root}"] 

1791 """Contains all expected root locations in a format expected to be 

1792 returned by Butler stringification. 

1793 """ 

1794 

1795 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1796 """The expected format of the S3 Datastore string.""" 

1797 

1798 registryStr = "/gen3.sqlite3" 

1799 """Expected format of the Registry string.""" 

1800 

1801 mock_s3 = mock_s3() 

1802 """The mocked s3 interface from moto.""" 

1803 

1804 def genRoot(self): 

1805 """Returns a random string of len 20 to serve as a root 

1806 name for the temporary bucket repo. 

1807 

1808 This is equivalent to tempfile.mkdtemp as this is what self.root 

1809 becomes when useTempRoot is True. 

1810 """ 

1811 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1812 return rndstr + "/" 

1813 

1814 def setUp(self): 

1815 config = Config(self.configFile) 

1816 uri = ResourcePath(config[".datastore.datastore.root"]) 

1817 self.bucketName = uri.netloc 

1818 

1819 # Enable S3 mocking of tests. 

1820 self.mock_s3.start() 

1821 

1822 # set up some fake credentials if they do not exist 

1823 self.usingDummyCredentials = setAwsEnvCredentials() 

1824 

1825 if self.useTempRoot: 

1826 self.root = self.genRoot() 

1827 rooturi = f"s3://{self.bucketName}/{self.root}" 

1828 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1829 

1830 # need local folder to store registry database 

1831 self.reg_dir = makeTestTempDir(TESTDIR) 

1832 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1833 

1834 # MOTO needs to know that we expect Bucket bucketname to exist 

1835 # (this used to be the class attribute bucketName) 

1836 s3 = boto3.resource("s3") 

1837 s3.create_bucket(Bucket=self.bucketName) 

1838 

1839 self.datastoreStr = f"datastore={self.root}" 

1840 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1841 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1842 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1843 

1844 def tearDown(self): 

1845 s3 = boto3.resource("s3") 

1846 bucket = s3.Bucket(self.bucketName) 

1847 try: 

1848 bucket.objects.all().delete() 

1849 except botocore.exceptions.ClientError as e: 

1850 if e.response["Error"]["Code"] == "404": 

1851 # the key was not reachable - pass 

1852 pass 

1853 else: 

1854 raise 

1855 

1856 bucket = s3.Bucket(self.bucketName) 

1857 bucket.delete() 

1858 

1859 # Stop the S3 mock. 

1860 self.mock_s3.stop() 

1861 

1862 # unset any potentially set dummy credentials 

1863 if self.usingDummyCredentials: 

1864 unsetAwsEnvCredentials() 

1865 

1866 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1867 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1868 

1869 if self.useTempRoot and os.path.exists(self.root): 

1870 shutil.rmtree(self.root, ignore_errors=True) 

1871 

1872 super().tearDown() 

1873 

1874 

1875@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1876class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1877 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1878 a local in-memory SqlRegistry. 

1879 """ 

1880 

1881 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1882 fullConfigKey = None 

1883 validationCanFail = True 

1884 

1885 serverName = "localhost" 

1886 """Name of the server that will be used in the tests. 

1887 """ 

1888 

1889 portNumber = 8080 

1890 """Port on which the webdav server listens. Automatically chosen 

1891 at setUpClass via the _getfreeport() method 

1892 """ 

1893 

1894 root = "butlerRoot/" 

1895 """Root repository directory expected to be used in case useTempRoot=False. 

1896 Otherwise the root is set to a 20 characters long randomly generated string 

1897 during set-up. 

1898 """ 

1899 

1900 datastoreStr = [f"datastore={root}"] 

1901 """Contains all expected root locations in a format expected to be 

1902 returned by Butler stringification. 

1903 """ 

1904 

1905 datastoreName = ["FileDatastore@https://{serverName}/{root}"] 

1906 """The expected format of the WebdavDatastore string.""" 

1907 

1908 registryStr = "/gen3.sqlite3" 

1909 """Expected format of the Registry string.""" 

1910 

1911 serverThread = None 

1912 """Thread in which the local webdav server will run""" 

1913 

1914 stopWebdavServer = False 

1915 """This flag will cause the webdav server to 

1916 gracefully shut down when True 

1917 """ 

1918 

1919 def genRoot(self): 

1920 """Returns a random string of len 20 to serve as a root 

1921 name for the temporary bucket repo. 

1922 

1923 This is equivalent to tempfile.mkdtemp as this is what self.root 

1924 becomes when useTempRoot is True. 

1925 """ 

1926 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1927 return rndstr + "/" 

1928 

1929 @classmethod 

1930 def setUpClass(cls): 

1931 # Do the same as inherited class 

1932 cls.storageClassFactory = StorageClassFactory() 

1933 cls.storageClassFactory.addFromConfig(cls.configFile) 

1934 

1935 cls.portNumber = cls._getfreeport() 

1936 # Run a local webdav server on which tests will be run 

1937 cls.serverThread = Thread( 

1938 target=cls._serveWebdav, args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), daemon=True 

1939 ) 

1940 cls.serverThread.start() 

1941 # Wait for it to start 

1942 time.sleep(3) 

1943 

1944 @classmethod 

1945 def tearDownClass(cls): 

1946 # Ask for graceful shut down of the webdav server 

1947 cls.stopWebdavServer = True 

1948 # Wait for the thread to exit 

1949 cls.serverThread.join() 

1950 super().tearDownClass() 

1951 

1952 def setUp(self): 

1953 config = Config(self.configFile) 

1954 

1955 if self.useTempRoot: 

1956 self.root = self.genRoot() 

1957 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1958 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1959 

1960 # need local folder to store registry database 

1961 self.reg_dir = makeTestTempDir(TESTDIR) 

1962 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1963 

1964 self.datastoreStr = f"datastore={self.root}" 

1965 self.datastoreName = [f"FileDatastore@{self.rooturi}"] 

1966 

1967 if not _is_webdav_endpoint(self.rooturi): 

1968 raise OSError("Webdav server not running properly: cannot run tests.") 

1969 

1970 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1971 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1972 

1973 def tearDown(self): 

1974 # Clear temporary directory 

1975 ResourcePath(self.rooturi).remove() 

1976 ResourcePath(self.rooturi).session.close() 

1977 

1978 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1979 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1980 

1981 if self.useTempRoot and os.path.exists(self.root): 

1982 shutil.rmtree(self.root, ignore_errors=True) 

1983 

1984 super().tearDown() 

1985 

1986 def _serveWebdav(self, port: int, stopWebdavServer): 

1987 """Starts a local webdav-compatible HTTP server, 

1988 Listening on http://localhost:port 

1989 This server only runs when this test class is instantiated, 

1990 and then shuts down. Must be started is a separate thread. 

1991 

1992 Parameters 

1993 ---------- 

1994 port : `int` 

1995 The port number on which the server should listen 

1996 """ 

1997 root_path = gettempdir() 

1998 

1999 config = { 

2000 "host": "0.0.0.0", 

2001 "port": port, 

2002 "provider_mapping": {"/": root_path}, 

2003 "http_authenticator": {"domain_controller": None}, 

2004 "simple_dc": {"user_mapping": {"*": True}}, 

2005 "verbose": 0, 

2006 } 

2007 app = WsgiDAVApp(config) 

2008 

2009 server_args = { 

2010 "bind_addr": (config["host"], config["port"]), 

2011 "wsgi_app": app, 

2012 } 

2013 server = wsgi.Server(**server_args) 

2014 server.prepare() 

2015 

2016 try: 

2017 # Start the actual server in a separate thread 

2018 t = Thread(target=server.serve, daemon=True) 

2019 t.start() 

2020 # watch stopWebdavServer, and gracefully 

2021 # shut down the server when True 

2022 while True: 

2023 if stopWebdavServer(): 

2024 break 

2025 time.sleep(1) 

2026 except KeyboardInterrupt: 

2027 print("Caught Ctrl-C, shutting down...") 

2028 finally: 

2029 server.stop() 

2030 t.join() 

2031 

2032 def _getfreeport(): 

2033 """ 

2034 Determines a free port using sockets. 

2035 """ 

2036 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

2037 free_socket.bind(("127.0.0.1", 0)) 

2038 free_socket.listen() 

2039 port = free_socket.getsockname()[1] 

2040 free_socket.close() 

2041 return port 

2042 

2043 

2044class PosixDatastoreTransfers(unittest.TestCase): 

2045 """Test data transfers between butlers. 

2046 

2047 Test for different managers. UUID to UUID and integer to integer are 

2048 tested. UUID to integer is not supported since we do not currently 

2049 want to allow that. Integer to UUID is supported with the caveat 

2050 that UUID4 will be generated and this will be incorrect for raw 

2051 dataset types. The test ignores that. 

2052 """ 

2053 

2054 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2055 

2056 @classmethod 

2057 def setUpClass(cls): 

2058 cls.storageClassFactory = StorageClassFactory() 

2059 cls.storageClassFactory.addFromConfig(cls.configFile) 

2060 

2061 def setUp(self): 

2062 self.root = makeTestTempDir(TESTDIR) 

2063 self.config = Config(self.configFile) 

2064 

2065 def tearDown(self): 

2066 removeTestTempDir(self.root) 

2067 

2068 def create_butler(self, manager, label): 

2069 config = Config(self.configFile) 

2070 config["registry", "managers", "datasets"] = manager 

2071 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

2072 

2073 def create_butlers(self, manager1, manager2): 

2074 self.source_butler = self.create_butler(manager1, "1") 

2075 self.target_butler = self.create_butler(manager2, "2") 

2076 

2077 def testTransferUuidToUuid(self): 

2078 self.create_butlers( 

2079 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2080 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2081 ) 

2082 # Setting id_gen_map should have no effect here 

2083 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

2084 

2085 def testTransferMissing(self): 

2086 """Test transfers where datastore records are missing. 

2087 

2088 This is how execution butler works. 

2089 """ 

2090 self.create_butlers( 

2091 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2092 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2093 ) 

2094 

2095 # Configure the source butler to allow trust. 

2096 self.source_butler.datastore.trustGetRequest = True 

2097 

2098 self.assertButlerTransfers(purge=True) 

2099 

2100 def testTransferMissingDisassembly(self): 

2101 """Test transfers where datastore records are missing. 

2102 

2103 This is how execution butler works. 

2104 """ 

2105 self.create_butlers( 

2106 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2107 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2108 ) 

2109 

2110 # Configure the source butler to allow trust. 

2111 self.source_butler.datastore.trustGetRequest = True 

2112 

2113 # Test disassembly. 

2114 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

2115 

2116 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"): 

2117 """Test that a run can be transferred to another butler.""" 

2118 

2119 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2120 datasetTypeName = "random_data" 

2121 

2122 # Test will create 3 collections and we will want to transfer 

2123 # two of those three. 

2124 runs = ["run1", "run2", "other"] 

2125 

2126 # Also want to use two different dataset types to ensure that 

2127 # grouping works. 

2128 datasetTypeNames = ["random_data", "random_data_2"] 

2129 

2130 # Create the run collections in the source butler. 

2131 for run in runs: 

2132 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2133 

2134 # Create dimensions in source butler. 

2135 n_exposures = 30 

2136 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

2137 self.source_butler.registry.insertDimensionData( 

2138 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

2139 ) 

2140 self.source_butler.registry.insertDimensionData( 

2141 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

2142 ) 

2143 

2144 for i in range(n_exposures): 

2145 self.source_butler.registry.insertDimensionData( 

2146 "exposure", 

2147 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

2148 ) 

2149 

2150 # Create dataset types in the source butler. 

2151 dimensions = self.source_butler.registry.dimensions.extract(["instrument", "exposure"]) 

2152 for datasetTypeName in datasetTypeNames: 

2153 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2154 self.source_butler.registry.registerDatasetType(datasetType) 

2155 

2156 # Write a dataset to an unrelated run -- this will ensure that 

2157 # we are rewriting integer dataset ids in the target if necessary. 

2158 # Will not be relevant for UUID. 

2159 run = "distraction" 

2160 butler = Butler(butler=self.source_butler, run=run) 

2161 butler.put( 

2162 makeExampleMetrics(), 

2163 datasetTypeName, 

2164 exposure=1, 

2165 instrument="DummyCamComp", 

2166 physical_filter="d-r", 

2167 ) 

2168 

2169 # Write some example metrics to the source 

2170 butler = Butler(butler=self.source_butler) 

2171 

2172 # Set of DatasetRefs that should be in the list of refs to transfer 

2173 # but which will not be transferred. 

2174 deleted = set() 

2175 

2176 n_expected = 20 # Number of datasets expected to be transferred 

2177 source_refs = [] 

2178 for i in range(n_exposures): 

2179 # Put a third of datasets into each collection, only retain 

2180 # two thirds. 

2181 index = i % 3 

2182 run = runs[index] 

2183 datasetTypeName = datasetTypeNames[i % 2] 

2184 

2185 metric_data = { 

2186 "summary": {"counter": i}, 

2187 "output": {"text": "metric"}, 

2188 "data": [2 * x for x in range(i)], 

2189 } 

2190 metric = MetricsExample(**metric_data) 

2191 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2192 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2193 

2194 # Remove the datastore record using low-level API 

2195 if purge: 

2196 # Remove records for a fraction. 

2197 if index == 1: 

2198 # For one of these delete the file as well. 

2199 # This allows the "missing" code to filter the 

2200 # file out. 

2201 if not deleted: 

2202 primary, uris = butler.datastore.getURIs(ref) 

2203 if primary: 

2204 primary.remove() 

2205 for uri in uris.values(): 

2206 uri.remove() 

2207 n_expected -= 1 

2208 deleted.add(ref) 

2209 

2210 # Remove the datastore record. 

2211 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

2212 

2213 if index < 2: 

2214 source_refs.append(ref) 

2215 if ref not in deleted: 

2216 new_metric = butler.get(ref.unresolved(), collections=run) 

2217 self.assertEqual(new_metric, metric) 

2218 

2219 # Create some bad dataset types to ensure we check for inconsistent 

2220 # definitions. 

2221 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2222 for datasetTypeName in datasetTypeNames: 

2223 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2224 self.target_butler.registry.registerDatasetType(datasetType) 

2225 with self.assertRaises(ConflictingDefinitionError) as cm: 

2226 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2227 self.assertIn("dataset type differs", str(cm.exception)) 

2228 

2229 # And remove the bad definitions. 

2230 for datasetTypeName in datasetTypeNames: 

2231 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2232 

2233 # Transfer without creating dataset types should fail. 

2234 with self.assertRaises(KeyError): 

2235 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2236 

2237 # Transfer without creating dimensions should fail. 

2238 with self.assertRaises(ConflictingDefinitionError) as cm: 

2239 self.target_butler.transfer_from( 

2240 self.source_butler, source_refs, id_gen_map=id_gen_map, register_dataset_types=True 

2241 ) 

2242 self.assertIn("dimension", str(cm.exception)) 

2243 

2244 # The failed transfer above leaves registry in an inconsistent 

2245 # state because the run is created but then rolled back without 

2246 # the collection cache being cleared. For now force a refresh. 

2247 # Can remove with DM-35498. 

2248 self.target_butler.registry.refresh() 

2249 

2250 # Now transfer them to the second butler, including dimensions. 

2251 with self.assertLogs(level=logging.DEBUG) as cm: 

2252 transferred = self.target_butler.transfer_from( 

2253 self.source_butler, 

2254 source_refs, 

2255 id_gen_map=id_gen_map, 

2256 register_dataset_types=True, 

2257 transfer_dimensions=True, 

2258 ) 

2259 self.assertEqual(len(transferred), n_expected) 

2260 log_output = ";".join(cm.output) 

2261 self.assertIn("found in datastore for chunk", log_output) 

2262 self.assertIn("Creating output run", log_output) 

2263 

2264 # Do the transfer twice to ensure that it will do nothing extra. 

2265 # Only do this if purge=True because it does not work for int 

2266 # dataset_id. 

2267 if purge: 

2268 # This should not need to register dataset types. 

2269 transferred = self.target_butler.transfer_from( 

2270 self.source_butler, source_refs, id_gen_map=id_gen_map 

2271 ) 

2272 self.assertEqual(len(transferred), n_expected) 

2273 

2274 # Also do an explicit low-level transfer to trigger some 

2275 # edge cases. 

2276 with self.assertLogs(level=logging.DEBUG) as cm: 

2277 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

2278 log_output = ";".join(cm.output) 

2279 self.assertIn("no file artifacts exist", log_output) 

2280 

2281 with self.assertRaises(TypeError): 

2282 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) 

2283 

2284 with self.assertRaises(ValueError): 

2285 self.target_butler.datastore.transfer_from( 

2286 self.source_butler.datastore, source_refs, transfer="split" 

2287 ) 

2288 

2289 # Now try to get the same refs from the new butler. 

2290 for ref in source_refs: 

2291 if ref not in deleted: 

2292 unresolved_ref = ref.unresolved() 

2293 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run) 

2294 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run) 

2295 self.assertEqual(new_metric, old_metric) 

2296 

2297 # Now prune run2 collection and create instead a CHAINED collection. 

2298 # This should block the transfer. 

2299 self.target_butler.pruneCollection("run2", purge=True, unstore=True) 

2300 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2301 with self.assertRaises(CollectionTypeError): 

2302 # Re-importing the run1 datasets can be problematic if they 

2303 # use integer IDs so filter those out. 

2304 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2305 self.target_butler.transfer_from(self.source_butler, to_transfer, id_gen_map=id_gen_map) 

2306 

2307 

2308if __name__ == "__main__": 2308 ↛ 2309line 2308 didn't jump to line 2309, because the condition on line 2308 was never true

2309 unittest.main()