Coverage for tests/test_butler.py: 14%

1248 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-07 02:47 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import gc 

26import logging 

27import os 

28import pathlib 

29import pickle 

30import posixpath 

31import random 

32import shutil 

33import socket 

34import string 

35import tempfile 

36import time 

37import unittest 

38from tempfile import gettempdir 

39from threading import Thread 

40 

41try: 

42 import boto3 

43 import botocore 

44 from moto import mock_s3 

45except ImportError: 

46 boto3 = None 

47 

48 def mock_s3(cls): 

49 """A no-op decorator in case moto mock_s3 can not be imported.""" 

50 return cls 

51 

52 

53try: 

54 # It's possible but silly to have testing.postgresql installed without 

55 # having the postgresql server installed (because then nothing in 

56 # testing.postgresql would work), so we use the presence of that module 

57 # to test whether we can expect the server to be available. 

58 import testing.postgresql 

59except ImportError: 

60 testing = None 

61 

62 

63try: 

64 from cheroot import wsgi 

65 from wsgidav.wsgidav_app import WsgiDAVApp 

66except ImportError: 

67 WsgiDAVApp = None 

68 

69import astropy.time 

70import sqlalchemy 

71from lsst.daf.butler import ( 

72 Butler, 

73 ButlerConfig, 

74 CollectionType, 

75 Config, 

76 DatasetIdGenEnum, 

77 DatasetRef, 

78 DatasetType, 

79 FileDataset, 

80 FileTemplate, 

81 FileTemplateValidationError, 

82 StorageClassFactory, 

83 ValidationError, 

84 script, 

85) 

86from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

87from lsst.daf.butler.registry import ( 

88 CollectionError, 

89 CollectionTypeError, 

90 ConflictingDefinitionError, 

91 DataIdValueError, 

92 MissingCollectionError, 

93) 

94from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

95from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

96from lsst.resources import ResourcePath 

97from lsst.resources.http import _is_webdav_endpoint 

98from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

99from lsst.utils import doImport 

100from lsst.utils.introspection import get_full_type_name 

101 

102TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

103 

104 

105def makeExampleMetrics(): 

106 return MetricsExample( 

107 {"AM1": 5.2, "AM2": 30.6}, 

108 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

109 [563, 234, 456.7, 752, 8, 9, 27], 

110 ) 

111 

112 

113class TransactionTestError(Exception): 

114 """Specific error for testing transactions, to prevent misdiagnosing 

115 that might otherwise occur when a standard exception is used. 

116 """ 

117 

118 pass 

119 

120 

121class ButlerConfigTests(unittest.TestCase): 

122 """Simple tests for ButlerConfig that are not tested in any other test 

123 cases.""" 

124 

125 def testSearchPath(self): 

126 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

127 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

128 config1 = ButlerConfig(configFile) 

129 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

130 

131 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

132 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

133 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

134 self.assertIn("testConfigs", "\n".join(cm.output)) 

135 

136 key = ("datastore", "records", "table") 

137 self.assertNotEqual(config1[key], config2[key]) 

138 self.assertEqual(config2[key], "override_record") 

139 

140 

141class ButlerPutGetTests: 

142 """Helper method for running a suite of put/get tests from different 

143 butler configurations.""" 

144 

145 root = None 

146 default_run = "ingésτ😺" 

147 

148 @staticmethod 

149 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

150 """Create a DatasetType and register it""" 

151 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

152 registry.registerDatasetType(datasetType) 

153 return datasetType 

154 

155 @classmethod 

156 def setUpClass(cls): 

157 cls.storageClassFactory = StorageClassFactory() 

158 cls.storageClassFactory.addFromConfig(cls.configFile) 

159 

160 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

161 datasetType = datasetRef.datasetType 

162 dataId = datasetRef.dataId 

163 deferred = butler.getDirectDeferred(datasetRef) 

164 

165 for component in components: 

166 compTypeName = datasetType.componentTypeName(component) 

167 result = butler.get(compTypeName, dataId, collections=collections) 

168 self.assertEqual(result, getattr(reference, component)) 

169 result_deferred = deferred.get(component=component) 

170 self.assertEqual(result_deferred, result) 

171 

172 def tearDown(self): 

173 removeTestTempDir(self.root) 

174 

175 def create_butler(self, run, storageClass, datasetTypeName): 

176 butler = Butler(self.tmpConfigFile, run=run) 

177 

178 collections = set(butler.registry.queryCollections()) 

179 self.assertEqual(collections, set([run])) 

180 

181 # Create and register a DatasetType 

182 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

183 

184 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

185 

186 # Add needed Dimensions 

187 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

188 butler.registry.insertDimensionData( 

189 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

190 ) 

191 butler.registry.insertDimensionData( 

192 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

193 ) 

194 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

195 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

196 butler.registry.insertDimensionData( 

197 "visit", 

198 { 

199 "instrument": "DummyCamComp", 

200 "id": 423, 

201 "name": "fourtwentythree", 

202 "physical_filter": "d-r", 

203 "visit_system": 1, 

204 "datetime_begin": visit_start, 

205 "datetime_end": visit_end, 

206 }, 

207 ) 

208 

209 # Add more visits for some later tests 

210 for visit_id in (424, 425): 

211 butler.registry.insertDimensionData( 

212 "visit", 

213 { 

214 "instrument": "DummyCamComp", 

215 "id": visit_id, 

216 "name": f"fourtwentyfour_{visit_id}", 

217 "physical_filter": "d-r", 

218 "visit_system": 1, 

219 }, 

220 ) 

221 return butler, datasetType 

222 

223 def runPutGetTest(self, storageClass, datasetTypeName): 

224 # New datasets will be added to run and tag, but we will only look in 

225 # tag when looking up datasets. 

226 run = self.default_run 

227 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

228 

229 # Create and store a dataset 

230 metric = makeExampleMetrics() 

231 dataId = {"instrument": "DummyCamComp", "visit": 423} 

232 

233 # Create a DatasetRef for put 

234 refIn = DatasetRef(datasetType, dataId, id=None) 

235 

236 # Put with a preexisting id should fail 

237 with self.assertRaises(ValueError): 

238 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

239 

240 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

241 # and once with a DatasetType 

242 

243 # Keep track of any collections we add and do not clean up 

244 expected_collections = {run} 

245 

246 counter = 0 

247 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

248 # Since we are using subTest we can get cascading failures 

249 # here with the first attempt failing and the others failing 

250 # immediately because the dataset already exists. Work around 

251 # this by using a distinct run collection each time 

252 counter += 1 

253 this_run = f"put_run_{counter}" 

254 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

255 expected_collections.update({this_run}) 

256 

257 with self.subTest(args=args): 

258 ref = butler.put(metric, *args, run=this_run) 

259 self.assertIsInstance(ref, DatasetRef) 

260 

261 # Test getDirect 

262 metricOut = butler.getDirect(ref) 

263 self.assertEqual(metric, metricOut) 

264 # Test get 

265 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

266 self.assertEqual(metric, metricOut) 

267 # Test get with a datasetRef 

268 metricOut = butler.get(ref, collections=this_run) 

269 self.assertEqual(metric, metricOut) 

270 # Test getDeferred with dataId 

271 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

272 self.assertEqual(metric, metricOut) 

273 # Test getDeferred with a datasetRef 

274 metricOut = butler.getDeferred(ref, collections=this_run).get() 

275 self.assertEqual(metric, metricOut) 

276 # and deferred direct with ref 

277 metricOut = butler.getDirectDeferred(ref).get() 

278 self.assertEqual(metric, metricOut) 

279 

280 # Check we can get components 

281 if storageClass.isComposite(): 

282 self.assertGetComponents( 

283 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

284 ) 

285 

286 # Can the artifacts themselves be retrieved? 

287 if not butler.datastore.isEphemeral: 

288 root_uri = ResourcePath(self.root) 

289 

290 for preserve_path in (True, False): 

291 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

292 # Use copy so that we can test that overwrite 

293 # protection works (using "auto" for File URIs would 

294 # use hard links and subsequent transfer would work 

295 # because it knows they are the same file). 

296 transferred = butler.retrieveArtifacts( 

297 [ref], destination, preserve_path=preserve_path, transfer="copy" 

298 ) 

299 self.assertGreater(len(transferred), 0) 

300 artifacts = list(ResourcePath.findFileResources([destination])) 

301 self.assertEqual(set(transferred), set(artifacts)) 

302 

303 for artifact in transferred: 

304 path_in_destination = artifact.relative_to(destination) 

305 self.assertIsNotNone(path_in_destination) 

306 

307 # when path is not preserved there should not be 

308 # any path separators. 

309 num_seps = path_in_destination.count("/") 

310 if preserve_path: 

311 self.assertGreater(num_seps, 0) 

312 else: 

313 self.assertEqual(num_seps, 0) 

314 

315 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

316 n_uris = len(secondary_uris) 

317 if primary_uri: 

318 n_uris += 1 

319 self.assertEqual( 

320 len(artifacts), 

321 n_uris, 

322 "Comparing expected artifacts vs actual:" 

323 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

324 ) 

325 

326 if preserve_path: 

327 # No need to run these twice 

328 with self.assertRaises(ValueError): 

329 butler.retrieveArtifacts([ref], destination, transfer="move") 

330 

331 with self.assertRaises(FileExistsError): 

332 butler.retrieveArtifacts([ref], destination) 

333 

334 transferred_again = butler.retrieveArtifacts( 

335 [ref], destination, preserve_path=preserve_path, overwrite=True 

336 ) 

337 self.assertEqual(set(transferred_again), set(transferred)) 

338 

339 # Now remove the dataset completely. 

340 butler.pruneDatasets([ref], purge=True, unstore=True) 

341 # Lookup with original args should still fail. 

342 with self.assertRaises(LookupError): 

343 butler.datasetExists(*args, collections=this_run) 

344 # getDirect() should still fail. 

345 with self.assertRaises(FileNotFoundError): 

346 butler.getDirect(ref) 

347 # Registry shouldn't be able to find it by dataset_id anymore. 

348 self.assertIsNone(butler.registry.getDataset(ref.id)) 

349 

350 # Do explicit registry removal since we know they are 

351 # empty 

352 butler.registry.removeCollection(this_run) 

353 expected_collections.remove(this_run) 

354 

355 # Put the dataset again, since the last thing we did was remove it 

356 # and we want to use the default collection. 

357 ref = butler.put(metric, refIn) 

358 

359 # Get with parameters 

360 stop = 4 

361 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

362 self.assertNotEqual(metric, sliced) 

363 self.assertEqual(metric.summary, sliced.summary) 

364 self.assertEqual(metric.output, sliced.output) 

365 self.assertEqual(metric.data[:stop], sliced.data) 

366 # getDeferred with parameters 

367 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

368 self.assertNotEqual(metric, sliced) 

369 self.assertEqual(metric.summary, sliced.summary) 

370 self.assertEqual(metric.output, sliced.output) 

371 self.assertEqual(metric.data[:stop], sliced.data) 

372 # getDeferred with deferred parameters 

373 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

374 self.assertNotEqual(metric, sliced) 

375 self.assertEqual(metric.summary, sliced.summary) 

376 self.assertEqual(metric.output, sliced.output) 

377 self.assertEqual(metric.data[:stop], sliced.data) 

378 

379 if storageClass.isComposite(): 

380 # Check that components can be retrieved 

381 metricOut = butler.get(ref.datasetType.name, dataId) 

382 compNameS = ref.datasetType.componentTypeName("summary") 

383 compNameD = ref.datasetType.componentTypeName("data") 

384 summary = butler.get(compNameS, dataId) 

385 self.assertEqual(summary, metric.summary) 

386 data = butler.get(compNameD, dataId) 

387 self.assertEqual(data, metric.data) 

388 

389 if "counter" in storageClass.derivedComponents: 

390 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

391 self.assertEqual(count, len(data)) 

392 

393 count = butler.get( 

394 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

395 ) 

396 self.assertEqual(count, stop) 

397 

398 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

399 summary = butler.getDirect(compRef) 

400 self.assertEqual(summary, metric.summary) 

401 

402 # Create a Dataset type that has the same name but is inconsistent. 

403 inconsistentDatasetType = DatasetType( 

404 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

405 ) 

406 

407 # Getting with a dataset type that does not match registry fails 

408 with self.assertRaises(ValueError): 

409 butler.get(inconsistentDatasetType, dataId) 

410 

411 # Combining a DatasetRef with a dataId should fail 

412 with self.assertRaises(ValueError): 

413 butler.get(ref, dataId) 

414 # Getting with an explicit ref should fail if the id doesn't match 

415 with self.assertRaises(ValueError): 

416 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

417 

418 # Getting a dataset with unknown parameters should fail 

419 with self.assertRaises(KeyError): 

420 butler.get(ref, parameters={"unsupported": True}) 

421 

422 # Check we have a collection 

423 collections = set(butler.registry.queryCollections()) 

424 self.assertEqual(collections, expected_collections) 

425 

426 # Clean up to check that we can remove something that may have 

427 # already had a component removed 

428 butler.pruneDatasets([ref], unstore=True, purge=True) 

429 

430 # Check that we can configure a butler to accept a put even 

431 # if it already has the dataset in registry. 

432 ref = butler.put(metric, refIn) 

433 

434 # Repeat put will fail. 

435 with self.assertRaises(ConflictingDefinitionError): 

436 butler.put(metric, refIn) 

437 

438 # Remove the datastore entry. 

439 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

440 

441 # Put will still fail 

442 with self.assertRaises(ConflictingDefinitionError): 

443 butler.put(metric, refIn) 

444 

445 # Allow the put to succeed 

446 butler._allow_put_of_predefined_dataset = True 

447 ref2 = butler.put(metric, refIn) 

448 self.assertEqual(ref2.id, ref.id) 

449 

450 # A second put will still fail but with a different exception 

451 # than before. 

452 with self.assertRaises(ConflictingDefinitionError): 

453 butler.put(metric, refIn) 

454 

455 # Reset the flag to avoid confusion 

456 butler._allow_put_of_predefined_dataset = False 

457 

458 # Leave the dataset in place since some downstream tests require 

459 # something to be present 

460 

461 return butler 

462 

463 def testDeferredCollectionPassing(self): 

464 # Construct a butler with no run or collection, but make it writeable. 

465 butler = Butler(self.tmpConfigFile, writeable=True) 

466 # Create and register a DatasetType 

467 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

468 datasetType = self.addDatasetType( 

469 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

470 ) 

471 # Add needed Dimensions 

472 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

473 butler.registry.insertDimensionData( 

474 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

475 ) 

476 butler.registry.insertDimensionData( 

477 "visit", 

478 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

479 ) 

480 dataId = {"instrument": "DummyCamComp", "visit": 423} 

481 # Create dataset. 

482 metric = makeExampleMetrics() 

483 # Register a new run and put dataset. 

484 run = "deferred" 

485 self.assertTrue(butler.registry.registerRun(run)) 

486 # Second time it will be allowed but indicate no-op 

487 self.assertFalse(butler.registry.registerRun(run)) 

488 ref = butler.put(metric, datasetType, dataId, run=run) 

489 # Putting with no run should fail with TypeError. 

490 with self.assertRaises(CollectionError): 

491 butler.put(metric, datasetType, dataId) 

492 # Dataset should exist. 

493 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

494 # We should be able to get the dataset back, but with and without 

495 # a deferred dataset handle. 

496 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

497 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

498 # Trying to find the dataset without any collection is a TypeError. 

499 with self.assertRaises(CollectionError): 

500 butler.datasetExists(datasetType, dataId) 

501 with self.assertRaises(CollectionError): 

502 butler.get(datasetType, dataId) 

503 # Associate the dataset with a different collection. 

504 butler.registry.registerCollection("tagged") 

505 butler.registry.associate("tagged", [ref]) 

506 # Deleting the dataset from the new collection should make it findable 

507 # in the original collection. 

508 butler.pruneDatasets([ref], tags=["tagged"]) 

509 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

510 

511 

512class ButlerTests(ButlerPutGetTests): 

513 """Tests for Butler.""" 

514 

515 useTempRoot = True 

516 

517 def setUp(self): 

518 """Create a new butler root for each test.""" 

519 self.root = makeTestTempDir(TESTDIR) 

520 Butler.makeRepo(self.root, config=Config(self.configFile)) 

521 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

522 

523 def testConstructor(self): 

524 """Independent test of constructor.""" 

525 butler = Butler(self.tmpConfigFile, run=self.default_run) 

526 self.assertIsInstance(butler, Butler) 

527 

528 # Check that butler.yaml is added automatically. 

529 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

530 config_dir = self.tmpConfigFile[: -len(end)] 

531 butler = Butler(config_dir, run=self.default_run) 

532 self.assertIsInstance(butler, Butler) 

533 

534 # Even with a ResourcePath. 

535 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

536 self.assertIsInstance(butler, Butler) 

537 

538 collections = set(butler.registry.queryCollections()) 

539 self.assertEqual(collections, {self.default_run}) 

540 

541 # Check that some special characters can be included in run name. 

542 special_run = "u@b.c-A" 

543 butler_special = Butler(butler=butler, run=special_run) 

544 collections = set(butler_special.registry.queryCollections("*@*")) 

545 self.assertEqual(collections, {special_run}) 

546 

547 butler2 = Butler(butler=butler, collections=["other"]) 

548 self.assertEqual(butler2.collections, ("other",)) 

549 self.assertIsNone(butler2.run) 

550 self.assertIs(butler.datastore, butler2.datastore) 

551 

552 # Test that we can use an environment variable to find this 

553 # repository. 

554 butler_index = Config() 

555 butler_index["label"] = self.tmpConfigFile 

556 for suffix in (".yaml", ".json"): 

557 # Ensure that the content differs so that we know that 

558 # we aren't reusing the cache. 

559 bad_label = f"s3://bucket/not_real{suffix}" 

560 butler_index["bad_label"] = bad_label 

561 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

562 butler_index.dumpToUri(temp_file) 

563 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

564 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label"))) 

565 uri = Butler.get_repo_uri("bad_label") 

566 self.assertEqual(uri, ResourcePath(bad_label)) 

567 uri = Butler.get_repo_uri("label") 

568 butler = Butler(uri, writeable=False) 

569 self.assertIsInstance(butler, Butler) 

570 butler = Butler("label", writeable=False) 

571 self.assertIsInstance(butler, Butler) 

572 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

573 Butler("not_there", writeable=False) 

574 with self.assertRaises(KeyError) as cm: 

575 Butler.get_repo_uri("missing") 

576 self.assertIn("not known to", str(cm.exception)) 

577 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

578 with self.assertRaises(FileNotFoundError): 

579 Butler.get_repo_uri("label") 

580 self.assertEqual(Butler.get_known_repos(), set()) 

581 with self.assertRaises(KeyError) as cm: 

582 # No environment variable set. 

583 Butler.get_repo_uri("label") 

584 self.assertIn("No repository index defined", str(cm.exception)) 

585 with self.assertRaisesRegex(FileNotFoundError, "no known aliases"): 

586 # No aliases registered. 

587 Butler("not_there") 

588 self.assertEqual(Butler.get_known_repos(), set()) 

589 

590 def testBasicPutGet(self): 

591 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

592 self.runPutGetTest(storageClass, "test_metric") 

593 

594 def testCompositePutGetConcrete(self): 

595 

596 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

597 butler = self.runPutGetTest(storageClass, "test_metric") 

598 

599 # Should *not* be disassembled 

600 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

601 self.assertEqual(len(datasets), 1) 

602 uri, components = butler.getURIs(datasets[0]) 

603 self.assertIsInstance(uri, ResourcePath) 

604 self.assertFalse(components) 

605 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

606 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

607 

608 # Predicted dataset 

609 dataId = {"instrument": "DummyCamComp", "visit": 424} 

610 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

611 self.assertFalse(components) 

612 self.assertIsInstance(uri, ResourcePath) 

613 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

614 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

615 

616 def testCompositePutGetVirtual(self): 

617 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

618 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

619 

620 # Should be disassembled 

621 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

622 self.assertEqual(len(datasets), 1) 

623 uri, components = butler.getURIs(datasets[0]) 

624 

625 if butler.datastore.isEphemeral: 

626 # Never disassemble in-memory datastore 

627 self.assertIsInstance(uri, ResourcePath) 

628 self.assertFalse(components) 

629 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

630 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

631 else: 

632 self.assertIsNone(uri) 

633 self.assertEqual(set(components), set(storageClass.components)) 

634 for compuri in components.values(): 

635 self.assertIsInstance(compuri, ResourcePath) 

636 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

637 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

638 

639 # Predicted dataset 

640 dataId = {"instrument": "DummyCamComp", "visit": 424} 

641 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

642 

643 if butler.datastore.isEphemeral: 

644 # Never disassembled 

645 self.assertIsInstance(uri, ResourcePath) 

646 self.assertFalse(components) 

647 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

648 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

649 else: 

650 self.assertIsNone(uri) 

651 self.assertEqual(set(components), set(storageClass.components)) 

652 for compuri in components.values(): 

653 self.assertIsInstance(compuri, ResourcePath) 

654 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

655 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

656 

657 def testStorageClassOverrideGet(self): 

658 """Test storage class conversion on get with override.""" 

659 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

660 datasetTypeName = "anything" 

661 run = self.default_run 

662 

663 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

664 

665 # Create and store a dataset. 

666 metric = makeExampleMetrics() 

667 dataId = {"instrument": "DummyCamComp", "visit": 423} 

668 

669 ref = butler.put(metric, datasetType, dataId) 

670 

671 # Return native type. 

672 retrieved = butler.get(ref) 

673 self.assertEqual(retrieved, metric) 

674 

675 # Specify an override. 

676 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion") 

677 model = butler.getDirect(ref, storageClass=new_sc) 

678 self.assertNotEqual(type(model), type(retrieved)) 

679 self.assertIs(type(model), new_sc.pytype) 

680 self.assertEqual(retrieved, model) 

681 

682 # Defer but override later. 

683 deferred = butler.getDirectDeferred(ref) 

684 model = deferred.get(storageClass=new_sc) 

685 self.assertIs(type(model), new_sc.pytype) 

686 self.assertEqual(retrieved, model) 

687 

688 # Defer but override up front. 

689 deferred = butler.getDirectDeferred(ref, storageClass=new_sc) 

690 model = deferred.get() 

691 self.assertIs(type(model), new_sc.pytype) 

692 self.assertEqual(retrieved, model) 

693 

694 # Retrieve a component. Should be a tuple. 

695 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple") 

696 self.assertIs(type(data), tuple) 

697 self.assertEqual(data, tuple(retrieved.data)) 

698 

699 # Parameter on the write storage class should work regardless 

700 # of read storage class. 

701 data = butler.get( 

702 "anything.data", 

703 dataId, 

704 storageClass="StructuredDataDataTestTuple", 

705 parameters={"slice": slice(2, 4)}, 

706 ) 

707 self.assertEqual(len(data), 2) 

708 

709 # Try a parameter that is known to the read storage class but not 

710 # the write storage class. 

711 with self.assertRaises(KeyError): 

712 butler.get( 

713 "anything.data", 

714 dataId, 

715 storageClass="StructuredDataDataTestTuple", 

716 parameters={"xslice": slice(2, 4)}, 

717 ) 

718 

719 def testPytypePutCoercion(self): 

720 """Test python type coercion on Butler.get and put.""" 

721 

722 # Store some data with the normal example storage class. 

723 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

724 datasetTypeName = "test_metric" 

725 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

726 

727 dataId = {"instrument": "DummyCamComp", "visit": 423} 

728 

729 # Put a dict and this should coerce to a MetricsExample 

730 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

731 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

732 test_metric = butler.getDirect(metric_ref) 

733 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

734 self.assertEqual(test_metric.summary, test_dict["summary"]) 

735 self.assertEqual(test_metric.output, test_dict["output"]) 

736 

737 # Check that the put still works if a DatasetType is given with 

738 # a definition matching this python type. 

739 registry_type = butler.registry.getDatasetType(datasetTypeName) 

740 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

741 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

742 self.assertEqual(metric2_ref.datasetType, registry_type) 

743 

744 # The get will return the type expected by registry. 

745 test_metric2 = butler.getDirect(metric2_ref) 

746 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

747 

748 # Make a new DatasetRef with the compatible but different DatasetType. 

749 # This should now return a dict. 

750 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

751 test_dict2 = butler.getDirect(new_ref) 

752 self.assertEqual(get_full_type_name(test_dict2), "dict") 

753 

754 # Get it again with the wrong dataset type definition using get() 

755 # rather than getDirect(). This should be consistent with getDirect() 

756 # behavior and return the type of the DatasetType. 

757 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

758 self.assertEqual(get_full_type_name(test_dict3), "dict") 

759 

760 def testIngest(self): 

761 butler = Butler(self.tmpConfigFile, run=self.default_run) 

762 

763 # Create and register a DatasetType 

764 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

765 

766 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

767 datasetTypeName = "metric" 

768 

769 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

770 

771 # Add needed Dimensions 

772 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

773 butler.registry.insertDimensionData( 

774 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

775 ) 

776 for detector in (1, 2): 

777 butler.registry.insertDimensionData( 

778 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

779 ) 

780 

781 butler.registry.insertDimensionData( 

782 "visit", 

783 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

784 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

785 ) 

786 

787 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

788 dataRoot = os.path.join(TESTDIR, "data", "basic") 

789 datasets = [] 

790 for detector in (1, 2): 

791 detector_name = f"detector_{detector}" 

792 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

793 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

794 # Create a DatasetRef for ingest 

795 refIn = DatasetRef(datasetType, dataId, id=None) 

796 

797 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

798 

799 butler.ingest(*datasets, transfer="copy") 

800 

801 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

802 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

803 

804 metrics1 = butler.get(datasetTypeName, dataId1) 

805 metrics2 = butler.get(datasetTypeName, dataId2) 

806 self.assertNotEqual(metrics1, metrics2) 

807 

808 # Compare URIs 

809 uri1 = butler.getURI(datasetTypeName, dataId1) 

810 uri2 = butler.getURI(datasetTypeName, dataId2) 

811 self.assertNotEqual(uri1, uri2) 

812 

813 # Now do a multi-dataset but single file ingest 

814 metricFile = os.path.join(dataRoot, "detectors.yaml") 

815 refs = [] 

816 for detector in (1, 2): 

817 detector_name = f"detector_{detector}" 

818 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

819 # Create a DatasetRef for ingest 

820 refs.append(DatasetRef(datasetType, dataId, id=None)) 

821 

822 datasets = [] 

823 datasets.append(FileDataset(path=metricFile, refs=refs, formatter=MultiDetectorFormatter)) 

824 

825 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

826 

827 # Check that the datastore recorded no file size. 

828 # Not all datastores can support this. 

829 try: 

830 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0]) 

831 self.assertEqual(infos[0].file_size, -1) 

832 except AttributeError: 

833 pass 

834 

835 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

836 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

837 

838 multi1 = butler.get(datasetTypeName, dataId1) 

839 multi2 = butler.get(datasetTypeName, dataId2) 

840 

841 self.assertEqual(multi1, metrics1) 

842 self.assertEqual(multi2, metrics2) 

843 

844 # Compare URIs 

845 uri1 = butler.getURI(datasetTypeName, dataId1) 

846 uri2 = butler.getURI(datasetTypeName, dataId2) 

847 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

848 

849 # Test that removing one does not break the second 

850 # This line will issue a warning log message for a ChainedDatastore 

851 # that uses an InMemoryDatastore since in-memory can not ingest 

852 # files. 

853 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

854 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

855 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

856 multi2b = butler.get(datasetTypeName, dataId2) 

857 self.assertEqual(multi2, multi2b) 

858 

859 def testPruneCollections(self): 

860 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

861 butler = Butler(self.tmpConfigFile, writeable=True) 

862 # Load registry data with dimensions to hang datasets off of. 

863 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

864 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

865 # Add some RUN-type collections. 

866 run1 = "run1" 

867 butler.registry.registerRun(run1) 

868 run2 = "run2" 

869 butler.registry.registerRun(run2) 

870 # put some datasets. ref1 and ref2 have the same data ID, and are in 

871 # different runs. ref3 has a different data ID. 

872 metric = makeExampleMetrics() 

873 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

874 datasetType = self.addDatasetType( 

875 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

876 ) 

877 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

878 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

879 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

880 

881 # Try to delete a RUN collection without purge, or with purge and not 

882 # unstore. 

883 with self.assertRaises(TypeError): 

884 butler.pruneCollection(run1) 

885 with self.assertRaises(TypeError): 

886 butler.pruneCollection(run2, purge=True) 

887 # Add a TAGGED collection and associate ref3 only into it. 

888 tag1 = "tag1" 

889 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

890 self.assertTrue(registered) 

891 # Registering a second time should be allowed. 

892 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

893 self.assertFalse(registered) 

894 butler.registry.associate(tag1, [ref3]) 

895 # Add a CHAINED collection that searches run1 and then run2. It 

896 # logically contains only ref1, because ref2 is shadowed due to them 

897 # having the same data ID and dataset type. 

898 chain1 = "chain1" 

899 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

900 butler.registry.setCollectionChain(chain1, [run1, run2]) 

901 # Try to delete RUN collections, which should fail with complete 

902 # rollback because they're still referenced by the CHAINED 

903 # collection. 

904 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

905 butler.pruneCollection(run1, purge=True, unstore=True) 

906 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

907 butler.pruneCollection(run2, purge=True, unstore=True) 

908 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

909 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

910 self.assertTrue(existence[ref1]) 

911 self.assertTrue(existence[ref2]) 

912 self.assertTrue(existence[ref3]) 

913 # Try to delete CHAINED and TAGGED collections with purge; should not 

914 # work. 

915 with self.assertRaises(TypeError): 

916 butler.pruneCollection(tag1, purge=True, unstore=True) 

917 with self.assertRaises(TypeError): 

918 butler.pruneCollection(chain1, purge=True, unstore=True) 

919 # Remove the tagged collection with unstore=False. This should not 

920 # affect the datasets. 

921 butler.pruneCollection(tag1) 

922 with self.assertRaises(MissingCollectionError): 

923 butler.registry.getCollectionType(tag1) 

924 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

925 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

926 self.assertTrue(existence[ref1]) 

927 self.assertTrue(existence[ref2]) 

928 self.assertTrue(existence[ref3]) 

929 # Add the tagged collection back in, and remove it with unstore=True. 

930 # This should remove ref3 only from the datastore. 

931 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

932 butler.registry.associate(tag1, [ref3]) 

933 butler.pruneCollection(tag1, unstore=True) 

934 with self.assertRaises(MissingCollectionError): 

935 butler.registry.getCollectionType(tag1) 

936 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

937 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

938 self.assertTrue(existence[ref1]) 

939 self.assertTrue(existence[ref2]) 

940 self.assertFalse(existence[ref3]) 

941 # Delete the chain with unstore=False. The datasets should not be 

942 # affected at all. 

943 butler.pruneCollection(chain1) 

944 with self.assertRaises(MissingCollectionError): 

945 butler.registry.getCollectionType(chain1) 

946 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

947 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

948 self.assertTrue(existence[ref1]) 

949 self.assertTrue(existence[ref2]) 

950 self.assertFalse(existence[ref3]) 

951 # Redefine and then delete the chain with unstore=True. Only ref1 

952 # should be unstored (ref3 has already been unstored, but otherwise 

953 # would be now). 

954 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

955 butler.registry.setCollectionChain(chain1, [run1, run2]) 

956 butler.pruneCollection(chain1, unstore=True) 

957 with self.assertRaises(MissingCollectionError): 

958 butler.registry.getCollectionType(chain1) 

959 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

960 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

961 self.assertFalse(existence[ref1]) 

962 self.assertTrue(existence[ref2]) 

963 self.assertFalse(existence[ref3]) 

964 # Remove run1. This removes ref1 and ref3 from the registry (they're 

965 # already gone from the datastore, which is fine). 

966 butler.pruneCollection(run1, purge=True, unstore=True) 

967 with self.assertRaises(MissingCollectionError): 

968 butler.registry.getCollectionType(run1) 

969 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref2]) 

970 self.assertTrue(butler.datastore.exists(ref2)) 

971 # Remove run2. This removes ref2 from the registry and the datastore. 

972 butler.pruneCollection(run2, purge=True, unstore=True) 

973 with self.assertRaises(MissingCollectionError): 

974 butler.registry.getCollectionType(run2) 

975 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), []) 

976 

977 # Now that the collections have been pruned we can remove the 

978 # dataset type 

979 butler.registry.removeDatasetType(datasetType.name) 

980 

981 def testPickle(self): 

982 """Test pickle support.""" 

983 butler = Butler(self.tmpConfigFile, run=self.default_run) 

984 butlerOut = pickle.loads(pickle.dumps(butler)) 

985 self.assertIsInstance(butlerOut, Butler) 

986 self.assertEqual(butlerOut._config, butler._config) 

987 self.assertEqual(butlerOut.collections, butler.collections) 

988 self.assertEqual(butlerOut.run, butler.run) 

989 

990 def testGetDatasetTypes(self): 

991 butler = Butler(self.tmpConfigFile, run=self.default_run) 

992 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

993 dimensionEntries = [ 

994 ( 

995 "instrument", 

996 {"instrument": "DummyCam"}, 

997 {"instrument": "DummyHSC"}, 

998 {"instrument": "DummyCamComp"}, 

999 ), 

1000 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

1001 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

1002 ] 

1003 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1004 # Add needed Dimensions 

1005 for args in dimensionEntries: 

1006 butler.registry.insertDimensionData(*args) 

1007 

1008 # When a DatasetType is added to the registry entries are not created 

1009 # for components but querying them can return the components. 

1010 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

1011 components = set() 

1012 for datasetTypeName in datasetTypeNames: 

1013 # Create and register a DatasetType 

1014 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1015 

1016 for componentName in storageClass.components: 

1017 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

1018 

1019 fromRegistry: set[DatasetType] = set() 

1020 for parent_dataset_type in butler.registry.queryDatasetTypes(): 

1021 fromRegistry.add(parent_dataset_type) 

1022 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes()) 

1023 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

1024 

1025 # Now that we have some dataset types registered, validate them 

1026 butler.validateConfiguration( 

1027 ignore=[ 

1028 "test_metric_comp", 

1029 "metric3", 

1030 "metric5", 

1031 "calexp", 

1032 "DummySC", 

1033 "datasetType.component", 

1034 "random_data", 

1035 "random_data_2", 

1036 ] 

1037 ) 

1038 

1039 # Add a new datasetType that will fail template validation 

1040 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

1041 if self.validationCanFail: 

1042 with self.assertRaises(ValidationError): 

1043 butler.validateConfiguration() 

1044 

1045 # Rerun validation but with a subset of dataset type names 

1046 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

1047 

1048 # Rerun validation but ignore the bad datasetType 

1049 butler.validateConfiguration( 

1050 ignore=[ 

1051 "test_metric_comp", 

1052 "metric3", 

1053 "metric5", 

1054 "calexp", 

1055 "DummySC", 

1056 "datasetType.component", 

1057 "random_data", 

1058 "random_data_2", 

1059 ] 

1060 ) 

1061 

1062 def testTransaction(self): 

1063 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1064 datasetTypeName = "test_metric" 

1065 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1066 dimensionEntries = ( 

1067 ("instrument", {"instrument": "DummyCam"}), 

1068 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

1069 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

1070 ) 

1071 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1072 metric = makeExampleMetrics() 

1073 dataId = {"instrument": "DummyCam", "visit": 42} 

1074 # Create and register a DatasetType 

1075 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1076 with self.assertRaises(TransactionTestError): 

1077 with butler.transaction(): 

1078 # Add needed Dimensions 

1079 for args in dimensionEntries: 

1080 butler.registry.insertDimensionData(*args) 

1081 # Store a dataset 

1082 ref = butler.put(metric, datasetTypeName, dataId) 

1083 self.assertIsInstance(ref, DatasetRef) 

1084 # Test getDirect 

1085 metricOut = butler.getDirect(ref) 

1086 self.assertEqual(metric, metricOut) 

1087 # Test get 

1088 metricOut = butler.get(datasetTypeName, dataId) 

1089 self.assertEqual(metric, metricOut) 

1090 # Check we can get components 

1091 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

1092 raise TransactionTestError("This should roll back the entire transaction") 

1093 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

1094 butler.registry.expandDataId(dataId) 

1095 # Should raise LookupError for missing data ID value 

1096 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

1097 butler.get(datasetTypeName, dataId) 

1098 # Also check explicitly if Dataset entry is missing 

1099 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

1100 # Direct retrieval should not find the file in the Datastore 

1101 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

1102 butler.getDirect(ref) 

1103 

1104 def testMakeRepo(self): 

1105 """Test that we can write butler configuration to a new repository via 

1106 the Butler.makeRepo interface and then instantiate a butler from the 

1107 repo root. 

1108 """ 

1109 # Do not run the test if we know this datastore configuration does 

1110 # not support a file system root 

1111 if self.fullConfigKey is None: 

1112 return 

1113 

1114 # create two separate directories 

1115 root1 = tempfile.mkdtemp(dir=self.root) 

1116 root2 = tempfile.mkdtemp(dir=self.root) 

1117 

1118 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

1119 limited = Config(self.configFile) 

1120 butler1 = Butler(butlerConfig) 

1121 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

1122 full = Config(self.tmpConfigFile) 

1123 butler2 = Butler(butlerConfig) 

1124 # Butlers should have the same configuration regardless of whether 

1125 # defaults were expanded. 

1126 self.assertEqual(butler1._config, butler2._config) 

1127 # Config files loaded directly should not be the same. 

1128 self.assertNotEqual(limited, full) 

1129 # Make sure "limited" doesn't have a few keys we know it should be 

1130 # inheriting from defaults. 

1131 self.assertIn(self.fullConfigKey, full) 

1132 self.assertNotIn(self.fullConfigKey, limited) 

1133 

1134 # Collections don't appear until something is put in them 

1135 collections1 = set(butler1.registry.queryCollections()) 

1136 self.assertEqual(collections1, set()) 

1137 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1138 

1139 # Check that a config with no associated file name will not 

1140 # work properly with relocatable Butler repo 

1141 butlerConfig.configFile = None 

1142 with self.assertRaises(ValueError): 

1143 Butler(butlerConfig) 

1144 

1145 with self.assertRaises(FileExistsError): 

1146 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1147 

1148 def testStringification(self): 

1149 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1150 butlerStr = str(butler) 

1151 

1152 if self.datastoreStr is not None: 

1153 for testStr in self.datastoreStr: 

1154 self.assertIn(testStr, butlerStr) 

1155 if self.registryStr is not None: 

1156 self.assertIn(self.registryStr, butlerStr) 

1157 

1158 datastoreName = butler.datastore.name 

1159 if self.datastoreName is not None: 

1160 for testStr in self.datastoreName: 

1161 self.assertIn(testStr, datastoreName) 

1162 

1163 def testButlerRewriteDataId(self): 

1164 """Test that dataIds can be rewritten based on dimension records.""" 

1165 

1166 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1167 

1168 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1169 datasetTypeName = "random_data" 

1170 

1171 # Create dimension records. 

1172 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1173 butler.registry.insertDimensionData( 

1174 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1175 ) 

1176 butler.registry.insertDimensionData( 

1177 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1178 ) 

1179 

1180 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1181 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1182 butler.registry.registerDatasetType(datasetType) 

1183 

1184 n_exposures = 5 

1185 dayobs = 20210530 

1186 

1187 for i in range(n_exposures): 

1188 butler.registry.insertDimensionData( 

1189 "exposure", 

1190 { 

1191 "instrument": "DummyCamComp", 

1192 "id": i, 

1193 "obs_id": f"exp{i}", 

1194 "seq_num": i, 

1195 "day_obs": dayobs, 

1196 "physical_filter": "d-r", 

1197 }, 

1198 ) 

1199 

1200 # Write some data. 

1201 for i in range(n_exposures): 

1202 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1203 

1204 # Use the seq_num for the put to test rewriting. 

1205 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1206 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1207 

1208 # Check that the exposure is correct in the dataId 

1209 self.assertEqual(ref.dataId["exposure"], i) 

1210 

1211 # and check that we can get the dataset back with the same dataId 

1212 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1213 self.assertEqual(new_metric, metric) 

1214 

1215 

1216class FileDatastoreButlerTests(ButlerTests): 

1217 """Common tests and specialization of ButlerTests for butlers backed 

1218 by datastores that inherit from FileDatastore. 

1219 """ 

1220 

1221 def checkFileExists(self, root, relpath): 

1222 """Checks if file exists at a given path (relative to root). 

1223 

1224 Test testPutTemplates verifies actual physical existance of the files 

1225 in the requested location. 

1226 """ 

1227 uri = ResourcePath(root, forceDirectory=True) 

1228 return uri.join(relpath).exists() 

1229 

1230 def testPutTemplates(self): 

1231 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1232 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1233 

1234 # Add needed Dimensions 

1235 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1236 butler.registry.insertDimensionData( 

1237 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1238 ) 

1239 butler.registry.insertDimensionData( 

1240 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1241 ) 

1242 butler.registry.insertDimensionData( 

1243 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1244 ) 

1245 

1246 # Create and store a dataset 

1247 metric = makeExampleMetrics() 

1248 

1249 # Create two almost-identical DatasetTypes (both will use default 

1250 # template) 

1251 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1252 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1253 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1254 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1255 

1256 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1257 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1258 

1259 # Put with exactly the data ID keys needed 

1260 ref = butler.put(metric, "metric1", dataId1) 

1261 uri = butler.getURI(ref) 

1262 self.assertTrue( 

1263 self.checkFileExists( 

1264 butler.datastore.root, f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle" 

1265 ), 

1266 f"Checking existence of {uri}", 

1267 ) 

1268 

1269 # Check the template based on dimensions 

1270 butler.datastore.templates.validateTemplates([ref]) 

1271 

1272 # Put with extra data ID keys (physical_filter is an optional 

1273 # dependency); should not change template (at least the way we're 

1274 # defining them to behave now; the important thing is that they 

1275 # must be consistent). 

1276 ref = butler.put(metric, "metric2", dataId2) 

1277 uri = butler.getURI(ref) 

1278 self.assertTrue( 

1279 self.checkFileExists( 

1280 butler.datastore.root, f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle" 

1281 ), 

1282 f"Checking existence of {uri}", 

1283 ) 

1284 

1285 # Check the template based on dimensions 

1286 butler.datastore.templates.validateTemplates([ref]) 

1287 

1288 # Use a template that has a typo in dimension record metadata. 

1289 # Easier to test with a butler that has a ref with records attached. 

1290 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1291 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1292 path = template.format(ref) 

1293 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1294 

1295 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1296 with self.assertRaises(KeyError): 

1297 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1298 template.format(ref) 

1299 

1300 # Now use a file template that will not result in unique filenames 

1301 with self.assertRaises(FileTemplateValidationError): 

1302 butler.put(metric, "metric3", dataId1) 

1303 

1304 def testImportExport(self): 

1305 # Run put/get tests just to create and populate a repo. 

1306 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1307 self.runImportExportTest(storageClass) 

1308 

1309 @unittest.expectedFailure 

1310 def testImportExportVirtualComposite(self): 

1311 # Run put/get tests just to create and populate a repo. 

1312 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1313 self.runImportExportTest(storageClass) 

1314 

1315 def runImportExportTest(self, storageClass): 

1316 """This test does an export to a temp directory and an import back 

1317 into a new temp directory repo. It does not assume a posix datastore""" 

1318 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1319 print("Root:", exportButler.datastore.root) 

1320 # Test that the repo actually has at least one dataset. 

1321 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1322 self.assertGreater(len(datasets), 0) 

1323 # Add a DimensionRecord that's unused by those datasets. 

1324 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1325 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1326 # Export and then import datasets. 

1327 with safeTestTempDir(TESTDIR) as exportDir: 

1328 exportFile = os.path.join(exportDir, "exports.yaml") 

1329 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1330 export.saveDatasets(datasets) 

1331 # Export the same datasets again. This should quietly do 

1332 # nothing because of internal deduplication, and it shouldn't 

1333 # complain about being asked to export the "htm7" elements even 

1334 # though there aren't any in these datasets or in the database. 

1335 export.saveDatasets(datasets, elements=["htm7"]) 

1336 # Save one of the data IDs again; this should be harmless 

1337 # because of internal deduplication. 

1338 export.saveDataIds([datasets[0].dataId]) 

1339 # Save some dimension records directly. 

1340 export.saveDimensionData("skymap", [skymapRecord]) 

1341 self.assertTrue(os.path.exists(exportFile)) 

1342 with safeTestTempDir(TESTDIR) as importDir: 

1343 # We always want this to be a local posix butler 

1344 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1345 # Calling script.butlerImport tests the implementation of the 

1346 # butler command line interface "import" subcommand. Functions 

1347 # in the script folder are generally considered protected and 

1348 # should not be used as public api. 

1349 with open(exportFile, "r") as f: 

1350 script.butlerImport( 

1351 importDir, 

1352 export_file=f, 

1353 directory=exportDir, 

1354 transfer="auto", 

1355 skip_dimensions=None, 

1356 reuse_ids=False, 

1357 ) 

1358 importButler = Butler(importDir, run=self.default_run) 

1359 for ref in datasets: 

1360 with self.subTest(ref=ref): 

1361 # Test for existence by passing in the DatasetType and 

1362 # data ID separately, to avoid lookup by dataset_id. 

1363 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1364 self.assertEqual( 

1365 list(importButler.registry.queryDimensionRecords("skymap")), 

1366 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)], 

1367 ) 

1368 

1369 def testRemoveRuns(self): 

1370 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1371 butler = Butler(self.tmpConfigFile, writeable=True) 

1372 # Load registry data with dimensions to hang datasets off of. 

1373 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1374 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1375 # Add some RUN-type collection. 

1376 run1 = "run1" 

1377 butler.registry.registerRun(run1) 

1378 run2 = "run2" 

1379 butler.registry.registerRun(run2) 

1380 # put a dataset in each 

1381 metric = makeExampleMetrics() 

1382 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1383 datasetType = self.addDatasetType( 

1384 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1385 ) 

1386 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1387 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1388 uri1 = butler.getURI(ref1, collections=[run1]) 

1389 uri2 = butler.getURI(ref2, collections=[run2]) 

1390 # Remove from both runs with different values for unstore. 

1391 butler.removeRuns([run1], unstore=True) 

1392 butler.removeRuns([run2], unstore=False) 

1393 # Should be nothing in registry for either one, and datastore should 

1394 # not think either exists. 

1395 with self.assertRaises(MissingCollectionError): 

1396 butler.registry.getCollectionType(run1) 

1397 with self.assertRaises(MissingCollectionError): 

1398 butler.registry.getCollectionType(run2) 

1399 self.assertFalse(butler.datastore.exists(ref1)) 

1400 self.assertFalse(butler.datastore.exists(ref2)) 

1401 # The ref we unstored should be gone according to the URI, but the 

1402 # one we forgot should still be around. 

1403 self.assertFalse(uri1.exists()) 

1404 self.assertTrue(uri2.exists()) 

1405 

1406 

1407class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1408 """PosixDatastore specialization of a butler""" 

1409 

1410 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1411 fullConfigKey = ".datastore.formatters" 

1412 validationCanFail = True 

1413 datastoreStr = ["/tmp"] 

1414 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1415 registryStr = "/gen3.sqlite3" 

1416 

1417 def testPathConstructor(self): 

1418 """Independent test of constructor using PathLike.""" 

1419 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1420 self.assertIsInstance(butler, Butler) 

1421 

1422 # And again with a Path object with the butler yaml 

1423 path = pathlib.Path(self.tmpConfigFile) 

1424 butler = Butler(path, writeable=False) 

1425 self.assertIsInstance(butler, Butler) 

1426 

1427 # And again with a Path object without the butler yaml 

1428 # (making sure we skip it if the tmp config doesn't end 

1429 # in butler.yaml -- which is the case for a subclass) 

1430 if self.tmpConfigFile.endswith("butler.yaml"): 

1431 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1432 butler = Butler(path, writeable=False) 

1433 self.assertIsInstance(butler, Butler) 

1434 

1435 def testExportTransferCopy(self): 

1436 """Test local export using all transfer modes""" 

1437 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1438 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1439 # Test that the repo actually has at least one dataset. 

1440 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1441 self.assertGreater(len(datasets), 0) 

1442 uris = [exportButler.getURI(d) for d in datasets] 

1443 datastoreRoot = exportButler.datastore.root 

1444 

1445 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1446 

1447 for path in pathsInStore: 

1448 # Assume local file system 

1449 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1450 

1451 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1452 with safeTestTempDir(TESTDIR) as exportDir: 

1453 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1454 export.saveDatasets(datasets) 

1455 for path in pathsInStore: 

1456 self.assertTrue( 

1457 self.checkFileExists(exportDir, path), 

1458 f"Check that mode {transfer} exported files", 

1459 ) 

1460 

1461 def testPruneDatasets(self): 

1462 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1463 butler = Butler(self.tmpConfigFile, writeable=True) 

1464 # Load registry data with dimensions to hang datasets off of. 

1465 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1466 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1467 # Add some RUN-type collections. 

1468 run1 = "run1" 

1469 butler.registry.registerRun(run1) 

1470 run2 = "run2" 

1471 butler.registry.registerRun(run2) 

1472 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1473 # different runs. ref3 has a different data ID. 

1474 metric = makeExampleMetrics() 

1475 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1476 datasetType = self.addDatasetType( 

1477 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1478 ) 

1479 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1480 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1481 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1482 

1483 # Simple prune. 

1484 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1485 with self.assertRaises(LookupError): 

1486 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1) 

1487 

1488 # Put data back. 

1489 ref1 = butler.put(metric, ref1.unresolved(), run=run1) 

1490 ref2 = butler.put(metric, ref2.unresolved(), run=run2) 

1491 ref3 = butler.put(metric, ref3.unresolved(), run=run1) 

1492 

1493 # Check that in normal mode, deleting the record will lead to 

1494 # trash not touching the file. 

1495 uri1 = butler.datastore.getURI(ref1) 

1496 butler.datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table 

1497 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id}) 

1498 butler.datastore.trash(ref1) 

1499 butler.datastore.emptyTrash() 

1500 self.assertTrue(uri1.exists()) 

1501 uri1.remove() # Clean it up. 

1502 

1503 # Simulate execution butler setup by deleting the datastore 

1504 # record but keeping the file around and trusting. 

1505 butler.datastore.trustGetRequest = True 

1506 uri2 = butler.datastore.getURI(ref2) 

1507 uri3 = butler.datastore.getURI(ref3) 

1508 self.assertTrue(uri2.exists()) 

1509 self.assertTrue(uri3.exists()) 

1510 

1511 # Remove the datastore record. 

1512 butler.datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table 

1513 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id}) 

1514 self.assertTrue(uri2.exists()) 

1515 butler.datastore.trash([ref2, ref3]) 

1516 # Immediate removal for ref2 file 

1517 self.assertFalse(uri2.exists()) 

1518 # But ref3 has to wait for the empty. 

1519 self.assertTrue(uri3.exists()) 

1520 butler.datastore.emptyTrash() 

1521 self.assertFalse(uri3.exists()) 

1522 

1523 # Clear out the datasets from registry. 

1524 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1525 

1526 def testPytypeCoercion(self): 

1527 """Test python type coercion on Butler.get and put.""" 

1528 

1529 # Store some data with the normal example storage class. 

1530 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1531 datasetTypeName = "test_metric" 

1532 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1533 

1534 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1535 metric = butler.get(datasetTypeName, dataId=dataId) 

1536 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1537 

1538 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1539 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1540 

1541 # Now need to hack the registry dataset type definition. 

1542 # There is no API for this. 

1543 manager = butler.registry._managers.datasets 

1544 manager._db.update( 

1545 manager._static.dataset_type, 

1546 {"name": datasetTypeName}, 

1547 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1548 ) 

1549 

1550 # Force reset of dataset type cache 

1551 butler.registry.refresh() 

1552 

1553 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1554 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1555 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1556 

1557 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1558 self.assertNotEqual(type(metric_model), type(metric)) 

1559 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1560 

1561 # Put the model and read it back to show that everything now 

1562 # works as normal. 

1563 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1564 metric_model_new = butler.get(metric_ref) 

1565 self.assertEqual(metric_model_new, metric_model) 

1566 

1567 # Hack the storage class again to something that will fail on the 

1568 # get with no conversion class. 

1569 manager._db.update( 

1570 manager._static.dataset_type, 

1571 {"name": datasetTypeName}, 

1572 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1573 ) 

1574 butler.registry.refresh() 

1575 

1576 with self.assertRaises(ValueError): 

1577 butler.get(datasetTypeName, dataId=dataId) 

1578 

1579 

1580@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1581class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1582 """PosixDatastore specialization of a butler using Postgres""" 

1583 

1584 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1585 fullConfigKey = ".datastore.formatters" 

1586 validationCanFail = True 

1587 datastoreStr = ["/tmp"] 

1588 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1589 registryStr = "PostgreSQL@test" 

1590 

1591 @staticmethod 

1592 def _handler(postgresql): 

1593 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1594 with engine.begin() as connection: 

1595 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1596 

1597 @classmethod 

1598 def setUpClass(cls): 

1599 # Create the postgres test server. 

1600 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1601 cache_initialized_db=True, on_initialized=cls._handler 

1602 ) 

1603 super().setUpClass() 

1604 

1605 @classmethod 

1606 def tearDownClass(cls): 

1607 # Clean up any lingering SQLAlchemy engines/connections 

1608 # so they're closed before we shut down the server. 

1609 gc.collect() 

1610 cls.postgresql.clear_cache() 

1611 super().tearDownClass() 

1612 

1613 def setUp(self): 

1614 self.server = self.postgresql() 

1615 

1616 # Need to add a registry section to the config. 

1617 self._temp_config = False 

1618 config = Config(self.configFile) 

1619 config["registry", "db"] = self.server.url() 

1620 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1621 config.dump(fh) 

1622 self.configFile = fh.name 

1623 self._temp_config = True 

1624 super().setUp() 

1625 

1626 def tearDown(self): 

1627 self.server.stop() 

1628 if self._temp_config and os.path.exists(self.configFile): 

1629 os.remove(self.configFile) 

1630 super().tearDown() 

1631 

1632 def testMakeRepo(self): 

1633 # The base class test assumes that it's using sqlite and assumes 

1634 # the config file is acceptable to sqlite. 

1635 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1636 

1637 

1638class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1639 """InMemoryDatastore specialization of a butler""" 

1640 

1641 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1642 fullConfigKey = None 

1643 useTempRoot = False 

1644 validationCanFail = False 

1645 datastoreStr = ["datastore='InMemory"] 

1646 datastoreName = ["InMemoryDatastore@"] 

1647 registryStr = "/gen3.sqlite3" 

1648 

1649 def testIngest(self): 

1650 pass 

1651 

1652 

1653class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1654 """PosixDatastore specialization""" 

1655 

1656 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1657 fullConfigKey = ".datastore.datastores.1.formatters" 

1658 validationCanFail = True 

1659 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1660 datastoreName = [ 

1661 "InMemoryDatastore@", 

1662 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1663 "SecondDatastore", 

1664 ] 

1665 registryStr = "/gen3.sqlite3" 

1666 

1667 

1668class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1669 """Test that a yaml file in one location can refer to a root in another.""" 

1670 

1671 datastoreStr = ["dir1"] 

1672 # Disable the makeRepo test since we are deliberately not using 

1673 # butler.yaml as the config name. 

1674 fullConfigKey = None 

1675 

1676 def setUp(self): 

1677 self.root = makeTestTempDir(TESTDIR) 

1678 

1679 # Make a new repository in one place 

1680 self.dir1 = os.path.join(self.root, "dir1") 

1681 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1682 

1683 # Move the yaml file to a different place and add a "root" 

1684 self.dir2 = os.path.join(self.root, "dir2") 

1685 os.makedirs(self.dir2, exist_ok=True) 

1686 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1687 config = Config(configFile1) 

1688 config["root"] = self.dir1 

1689 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1690 config.dumpToUri(configFile2) 

1691 os.remove(configFile1) 

1692 self.tmpConfigFile = configFile2 

1693 

1694 def testFileLocations(self): 

1695 self.assertNotEqual(self.dir1, self.dir2) 

1696 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1697 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1698 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1699 

1700 

1701class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1702 """Test that a config file created by makeRepo outside of repo works.""" 

1703 

1704 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1705 

1706 def setUp(self): 

1707 self.root = makeTestTempDir(TESTDIR) 

1708 self.root2 = makeTestTempDir(TESTDIR) 

1709 

1710 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1711 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1712 

1713 def tearDown(self): 

1714 if os.path.exists(self.root2): 

1715 shutil.rmtree(self.root2, ignore_errors=True) 

1716 super().tearDown() 

1717 

1718 def testConfigExistence(self): 

1719 c = Config(self.tmpConfigFile) 

1720 uri_config = ResourcePath(c["root"]) 

1721 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1722 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1723 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1724 

1725 def testPutGet(self): 

1726 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1727 self.runPutGetTest(storageClass, "test_metric") 

1728 

1729 

1730class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1731 """Test that a config file created by makeRepo outside of repo works.""" 

1732 

1733 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1734 

1735 def setUp(self): 

1736 self.root = makeTestTempDir(TESTDIR) 

1737 self.root2 = makeTestTempDir(TESTDIR) 

1738 

1739 self.tmpConfigFile = self.root2 

1740 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1741 

1742 def testConfigExistence(self): 

1743 # Append the yaml file else Config constructor does not know the file 

1744 # type. 

1745 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1746 super().testConfigExistence() 

1747 

1748 

1749class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1750 """Test that a config file created by makeRepo outside of repo works.""" 

1751 

1752 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1753 

1754 def setUp(self): 

1755 self.root = makeTestTempDir(TESTDIR) 

1756 self.root2 = makeTestTempDir(TESTDIR) 

1757 

1758 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1759 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1760 

1761 

1762@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1763class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1764 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1765 a local in-memory SqlRegistry. 

1766 """ 

1767 

1768 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1769 fullConfigKey = None 

1770 validationCanFail = True 

1771 

1772 bucketName = "anybucketname" 

1773 """Name of the Bucket that will be used in the tests. The name is read from 

1774 the config file used with the tests during set-up. 

1775 """ 

1776 

1777 root = "butlerRoot/" 

1778 """Root repository directory expected to be used in case useTempRoot=False. 

1779 Otherwise the root is set to a 20 characters long randomly generated string 

1780 during set-up. 

1781 """ 

1782 

1783 datastoreStr = [f"datastore={root}"] 

1784 """Contains all expected root locations in a format expected to be 

1785 returned by Butler stringification. 

1786 """ 

1787 

1788 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1789 """The expected format of the S3 Datastore string.""" 

1790 

1791 registryStr = "/gen3.sqlite3" 

1792 """Expected format of the Registry string.""" 

1793 

1794 mock_s3 = mock_s3() 

1795 """The mocked s3 interface from moto.""" 

1796 

1797 def genRoot(self): 

1798 """Returns a random string of len 20 to serve as a root 

1799 name for the temporary bucket repo. 

1800 

1801 This is equivalent to tempfile.mkdtemp as this is what self.root 

1802 becomes when useTempRoot is True. 

1803 """ 

1804 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1805 return rndstr + "/" 

1806 

1807 def setUp(self): 

1808 config = Config(self.configFile) 

1809 uri = ResourcePath(config[".datastore.datastore.root"]) 

1810 self.bucketName = uri.netloc 

1811 

1812 # Enable S3 mocking of tests. 

1813 self.mock_s3.start() 

1814 

1815 # set up some fake credentials if they do not exist 

1816 self.usingDummyCredentials = setAwsEnvCredentials() 

1817 

1818 if self.useTempRoot: 

1819 self.root = self.genRoot() 

1820 rooturi = f"s3://{self.bucketName}/{self.root}" 

1821 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1822 

1823 # need local folder to store registry database 

1824 self.reg_dir = makeTestTempDir(TESTDIR) 

1825 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1826 

1827 # MOTO needs to know that we expect Bucket bucketname to exist 

1828 # (this used to be the class attribute bucketName) 

1829 s3 = boto3.resource("s3") 

1830 s3.create_bucket(Bucket=self.bucketName) 

1831 

1832 self.datastoreStr = f"datastore={self.root}" 

1833 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1834 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1835 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1836 

1837 def tearDown(self): 

1838 s3 = boto3.resource("s3") 

1839 bucket = s3.Bucket(self.bucketName) 

1840 try: 

1841 bucket.objects.all().delete() 

1842 except botocore.exceptions.ClientError as e: 

1843 if e.response["Error"]["Code"] == "404": 

1844 # the key was not reachable - pass 

1845 pass 

1846 else: 

1847 raise 

1848 

1849 bucket = s3.Bucket(self.bucketName) 

1850 bucket.delete() 

1851 

1852 # Stop the S3 mock. 

1853 self.mock_s3.stop() 

1854 

1855 # unset any potentially set dummy credentials 

1856 if self.usingDummyCredentials: 

1857 unsetAwsEnvCredentials() 

1858 

1859 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1860 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1861 

1862 if self.useTempRoot and os.path.exists(self.root): 

1863 shutil.rmtree(self.root, ignore_errors=True) 

1864 

1865 super().tearDown() 

1866 

1867 

1868@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1869class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1870 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1871 a local in-memory SqlRegistry. 

1872 """ 

1873 

1874 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1875 fullConfigKey = None 

1876 validationCanFail = True 

1877 

1878 serverName = "localhost" 

1879 """Name of the server that will be used in the tests. 

1880 """ 

1881 

1882 portNumber = 8080 

1883 """Port on which the webdav server listens. Automatically chosen 

1884 at setUpClass via the _getfreeport() method 

1885 """ 

1886 

1887 root = "butlerRoot/" 

1888 """Root repository directory expected to be used in case useTempRoot=False. 

1889 Otherwise the root is set to a 20 characters long randomly generated string 

1890 during set-up. 

1891 """ 

1892 

1893 datastoreStr = [f"datastore={root}"] 

1894 """Contains all expected root locations in a format expected to be 

1895 returned by Butler stringification. 

1896 """ 

1897 

1898 datastoreName = ["FileDatastore@https://{serverName}/{root}"] 

1899 """The expected format of the WebdavDatastore string.""" 

1900 

1901 registryStr = "/gen3.sqlite3" 

1902 """Expected format of the Registry string.""" 

1903 

1904 serverThread = None 

1905 """Thread in which the local webdav server will run""" 

1906 

1907 stopWebdavServer = False 

1908 """This flag will cause the webdav server to 

1909 gracefully shut down when True 

1910 """ 

1911 

1912 def genRoot(self): 

1913 """Returns a random string of len 20 to serve as a root 

1914 name for the temporary bucket repo. 

1915 

1916 This is equivalent to tempfile.mkdtemp as this is what self.root 

1917 becomes when useTempRoot is True. 

1918 """ 

1919 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1920 return rndstr + "/" 

1921 

1922 @classmethod 

1923 def setUpClass(cls): 

1924 # Do the same as inherited class 

1925 cls.storageClassFactory = StorageClassFactory() 

1926 cls.storageClassFactory.addFromConfig(cls.configFile) 

1927 

1928 cls.portNumber = cls._getfreeport() 

1929 # Run a local webdav server on which tests will be run 

1930 cls.serverThread = Thread( 

1931 target=cls._serveWebdav, args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), daemon=True 

1932 ) 

1933 cls.serverThread.start() 

1934 # Wait for it to start 

1935 time.sleep(3) 

1936 

1937 @classmethod 

1938 def tearDownClass(cls): 

1939 # Ask for graceful shut down of the webdav server 

1940 cls.stopWebdavServer = True 

1941 # Wait for the thread to exit 

1942 cls.serverThread.join() 

1943 super().tearDownClass() 

1944 

1945 def setUp(self): 

1946 config = Config(self.configFile) 

1947 

1948 if self.useTempRoot: 

1949 self.root = self.genRoot() 

1950 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1951 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1952 

1953 # need local folder to store registry database 

1954 self.reg_dir = makeTestTempDir(TESTDIR) 

1955 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1956 

1957 self.datastoreStr = f"datastore={self.root}" 

1958 self.datastoreName = [f"FileDatastore@{self.rooturi}"] 

1959 

1960 if not _is_webdav_endpoint(self.rooturi): 

1961 raise OSError("Webdav server not running properly: cannot run tests.") 

1962 

1963 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1964 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1965 

1966 def tearDown(self): 

1967 # Clear temporary directory 

1968 ResourcePath(self.rooturi).remove() 

1969 ResourcePath(self.rooturi).session.close() 

1970 

1971 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1972 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1973 

1974 if self.useTempRoot and os.path.exists(self.root): 

1975 shutil.rmtree(self.root, ignore_errors=True) 

1976 

1977 super().tearDown() 

1978 

1979 def _serveWebdav(self, port: int, stopWebdavServer): 

1980 """Starts a local webdav-compatible HTTP server, 

1981 Listening on http://localhost:port 

1982 This server only runs when this test class is instantiated, 

1983 and then shuts down. Must be started is a separate thread. 

1984 

1985 Parameters 

1986 ---------- 

1987 port : `int` 

1988 The port number on which the server should listen 

1989 """ 

1990 root_path = gettempdir() 

1991 

1992 config = { 

1993 "host": "0.0.0.0", 

1994 "port": port, 

1995 "provider_mapping": {"/": root_path}, 

1996 "http_authenticator": {"domain_controller": None}, 

1997 "simple_dc": {"user_mapping": {"*": True}}, 

1998 "verbose": 0, 

1999 } 

2000 app = WsgiDAVApp(config) 

2001 

2002 server_args = { 

2003 "bind_addr": (config["host"], config["port"]), 

2004 "wsgi_app": app, 

2005 } 

2006 server = wsgi.Server(**server_args) 

2007 server.prepare() 

2008 

2009 try: 

2010 # Start the actual server in a separate thread 

2011 t = Thread(target=server.serve, daemon=True) 

2012 t.start() 

2013 # watch stopWebdavServer, and gracefully 

2014 # shut down the server when True 

2015 while True: 

2016 if stopWebdavServer(): 

2017 break 

2018 time.sleep(1) 

2019 except KeyboardInterrupt: 

2020 print("Caught Ctrl-C, shutting down...") 

2021 finally: 

2022 server.stop() 

2023 t.join() 

2024 

2025 def _getfreeport(): 

2026 """ 

2027 Determines a free port using sockets. 

2028 """ 

2029 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

2030 free_socket.bind(("127.0.0.1", 0)) 

2031 free_socket.listen() 

2032 port = free_socket.getsockname()[1] 

2033 free_socket.close() 

2034 return port 

2035 

2036 

2037class PosixDatastoreTransfers(unittest.TestCase): 

2038 """Test data transfers between butlers. 

2039 

2040 Test for different managers. UUID to UUID and integer to integer are 

2041 tested. UUID to integer is not supported since we do not currently 

2042 want to allow that. Integer to UUID is supported with the caveat 

2043 that UUID4 will be generated and this will be incorrect for raw 

2044 dataset types. The test ignores that. 

2045 """ 

2046 

2047 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2048 

2049 @classmethod 

2050 def setUpClass(cls): 

2051 cls.storageClassFactory = StorageClassFactory() 

2052 cls.storageClassFactory.addFromConfig(cls.configFile) 

2053 

2054 def setUp(self): 

2055 self.root = makeTestTempDir(TESTDIR) 

2056 self.config = Config(self.configFile) 

2057 

2058 def tearDown(self): 

2059 removeTestTempDir(self.root) 

2060 

2061 def create_butler(self, manager, label): 

2062 config = Config(self.configFile) 

2063 config["registry", "managers", "datasets"] = manager 

2064 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

2065 

2066 def create_butlers(self, manager1, manager2): 

2067 self.source_butler = self.create_butler(manager1, "1") 

2068 self.target_butler = self.create_butler(manager2, "2") 

2069 

2070 def testTransferUuidToUuid(self): 

2071 self.create_butlers( 

2072 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2073 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2074 ) 

2075 # Setting id_gen_map should have no effect here 

2076 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

2077 

2078 def testTransferIntToInt(self): 

2079 with self.assertWarns(FutureWarning): 

2080 self.create_butlers( 

2081 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

2082 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

2083 ) 

2084 # int dataset ID only allows UNIQUE 

2085 self.assertButlerTransfers() 

2086 

2087 def testTransferIntToUuid(self): 

2088 with self.assertWarns(FutureWarning): 

2089 self.create_butlers( 

2090 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

2091 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2092 ) 

2093 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

2094 

2095 def testTransferMissing(self): 

2096 """Test transfers where datastore records are missing. 

2097 

2098 This is how execution butler works. 

2099 """ 

2100 self.create_butlers( 

2101 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2102 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2103 ) 

2104 

2105 # Configure the source butler to allow trust. 

2106 self.source_butler.datastore.trustGetRequest = True 

2107 

2108 self.assertButlerTransfers(purge=True) 

2109 

2110 def testTransferMissingDisassembly(self): 

2111 """Test transfers where datastore records are missing. 

2112 

2113 This is how execution butler works. 

2114 """ 

2115 self.create_butlers( 

2116 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2117 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2118 ) 

2119 

2120 # Configure the source butler to allow trust. 

2121 self.source_butler.datastore.trustGetRequest = True 

2122 

2123 # Test disassembly. 

2124 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

2125 

2126 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"): 

2127 """Test that a run can be transferred to another butler.""" 

2128 

2129 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2130 datasetTypeName = "random_data" 

2131 

2132 # Test will create 3 collections and we will want to transfer 

2133 # two of those three. 

2134 runs = ["run1", "run2", "other"] 

2135 

2136 # Also want to use two different dataset types to ensure that 

2137 # grouping works. 

2138 datasetTypeNames = ["random_data", "random_data_2"] 

2139 

2140 # Create the run collections in the source butler. 

2141 for run in runs: 

2142 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2143 

2144 # Create dimensions in source butler. 

2145 n_exposures = 30 

2146 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

2147 self.source_butler.registry.insertDimensionData( 

2148 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

2149 ) 

2150 self.source_butler.registry.insertDimensionData( 

2151 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

2152 ) 

2153 

2154 for i in range(n_exposures): 

2155 self.source_butler.registry.insertDimensionData( 

2156 "exposure", 

2157 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

2158 ) 

2159 

2160 # Create dataset types in the source butler. 

2161 dimensions = self.source_butler.registry.dimensions.extract(["instrument", "exposure"]) 

2162 for datasetTypeName in datasetTypeNames: 

2163 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2164 self.source_butler.registry.registerDatasetType(datasetType) 

2165 

2166 # Write a dataset to an unrelated run -- this will ensure that 

2167 # we are rewriting integer dataset ids in the target if necessary. 

2168 # Will not be relevant for UUID. 

2169 run = "distraction" 

2170 butler = Butler(butler=self.source_butler, run=run) 

2171 butler.put( 

2172 makeExampleMetrics(), 

2173 datasetTypeName, 

2174 exposure=1, 

2175 instrument="DummyCamComp", 

2176 physical_filter="d-r", 

2177 ) 

2178 

2179 # Write some example metrics to the source 

2180 butler = Butler(butler=self.source_butler) 

2181 

2182 # Set of DatasetRefs that should be in the list of refs to transfer 

2183 # but which will not be transferred. 

2184 deleted = set() 

2185 

2186 n_expected = 20 # Number of datasets expected to be transferred 

2187 source_refs = [] 

2188 for i in range(n_exposures): 

2189 # Put a third of datasets into each collection, only retain 

2190 # two thirds. 

2191 index = i % 3 

2192 run = runs[index] 

2193 datasetTypeName = datasetTypeNames[i % 2] 

2194 

2195 metric_data = { 

2196 "summary": {"counter": i}, 

2197 "output": {"text": "metric"}, 

2198 "data": [2 * x for x in range(i)], 

2199 } 

2200 metric = MetricsExample(**metric_data) 

2201 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2202 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2203 

2204 # Remove the datastore record using low-level API 

2205 if purge: 

2206 # Remove records for a fraction. 

2207 if index == 1: 

2208 

2209 # For one of these delete the file as well. 

2210 # This allows the "missing" code to filter the 

2211 # file out. 

2212 if not deleted: 

2213 primary, uris = butler.datastore.getURIs(ref) 

2214 if primary: 

2215 primary.remove() 

2216 for uri in uris.values(): 

2217 uri.remove() 

2218 n_expected -= 1 

2219 deleted.add(ref) 

2220 

2221 # Remove the datastore record. 

2222 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

2223 

2224 if index < 2: 

2225 source_refs.append(ref) 

2226 if ref not in deleted: 

2227 new_metric = butler.get(ref.unresolved(), collections=run) 

2228 self.assertEqual(new_metric, metric) 

2229 

2230 # Create some bad dataset types to ensure we check for inconsistent 

2231 # definitions. 

2232 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2233 for datasetTypeName in datasetTypeNames: 

2234 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2235 self.target_butler.registry.registerDatasetType(datasetType) 

2236 with self.assertRaises(ConflictingDefinitionError) as cm: 

2237 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2238 self.assertIn("dataset type differs", str(cm.exception)) 

2239 

2240 # And remove the bad definitions. 

2241 for datasetTypeName in datasetTypeNames: 

2242 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2243 

2244 # Transfer without creating dataset types should fail. 

2245 with self.assertRaises(KeyError): 

2246 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2247 

2248 # Transfer without creating dimensions should fail. 

2249 with self.assertRaises(ConflictingDefinitionError) as cm: 

2250 self.target_butler.transfer_from( 

2251 self.source_butler, source_refs, id_gen_map=id_gen_map, register_dataset_types=True 

2252 ) 

2253 self.assertIn("dimension", str(cm.exception)) 

2254 

2255 # The failed transfer above leaves registry in an inconsistent 

2256 # state because the run is created but then rolled back without 

2257 # the collection cache being cleared. For now force a refresh. 

2258 # Can remove with DM-35498. 

2259 self.target_butler.registry.refresh() 

2260 

2261 # Now transfer them to the second butler, including dimensions. 

2262 with self.assertLogs(level=logging.DEBUG) as cm: 

2263 transferred = self.target_butler.transfer_from( 

2264 self.source_butler, 

2265 source_refs, 

2266 id_gen_map=id_gen_map, 

2267 register_dataset_types=True, 

2268 transfer_dimensions=True, 

2269 ) 

2270 self.assertEqual(len(transferred), n_expected) 

2271 log_output = ";".join(cm.output) 

2272 self.assertIn("found in datastore for chunk", log_output) 

2273 self.assertIn("Creating output run", log_output) 

2274 

2275 # Do the transfer twice to ensure that it will do nothing extra. 

2276 # Only do this if purge=True because it does not work for int 

2277 # dataset_id. 

2278 if purge: 

2279 # This should not need to register dataset types. 

2280 transferred = self.target_butler.transfer_from( 

2281 self.source_butler, source_refs, id_gen_map=id_gen_map 

2282 ) 

2283 self.assertEqual(len(transferred), n_expected) 

2284 

2285 # Also do an explicit low-level transfer to trigger some 

2286 # edge cases. 

2287 with self.assertLogs(level=logging.DEBUG) as cm: 

2288 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

2289 log_output = ";".join(cm.output) 

2290 self.assertIn("no file artifacts exist", log_output) 

2291 

2292 with self.assertRaises(TypeError): 

2293 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) 

2294 

2295 with self.assertRaises(ValueError): 

2296 self.target_butler.datastore.transfer_from( 

2297 self.source_butler.datastore, source_refs, transfer="split" 

2298 ) 

2299 

2300 # Now try to get the same refs from the new butler. 

2301 for ref in source_refs: 

2302 if ref not in deleted: 

2303 unresolved_ref = ref.unresolved() 

2304 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run) 

2305 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run) 

2306 self.assertEqual(new_metric, old_metric) 

2307 

2308 # Now prune run2 collection and create instead a CHAINED collection. 

2309 # This should block the transfer. 

2310 self.target_butler.pruneCollection("run2", purge=True, unstore=True) 

2311 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2312 with self.assertRaises(CollectionTypeError): 

2313 # Re-importing the run1 datasets can be problematic if they 

2314 # use integer IDs so filter those out. 

2315 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2316 self.target_butler.transfer_from(self.source_butler, to_transfer, id_gen_map=id_gen_map) 

2317 

2318 

2319if __name__ == "__main__": 2319 ↛ 2320line 2319 didn't jump to line 2320, because the condition on line 2319 was never true

2320 unittest.main()