Coverage for tests/test_butler.py: 13%

1304 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-10-02 08:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Tests for Butler. 

29""" 

30from __future__ import annotations 

31 

32import gc 

33import json 

34import logging 

35import os 

36import pathlib 

37import pickle 

38import posixpath 

39import random 

40import shutil 

41import string 

42import tempfile 

43import unittest 

44import uuid 

45from collections.abc import Mapping 

46from typing import TYPE_CHECKING, Any, cast 

47 

48try: 

49 import boto3 

50 import botocore 

51 from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

52 from moto import mock_s3 # type: ignore[import] 

53except ImportError: 

54 boto3 = None 

55 

56 def mock_s3(*args: Any, **kwargs: Any) -> Any: # type: ignore[no-untyped-def] 

57 """No-op decorator in case moto mock_s3 can not be imported.""" 

58 return None 

59 

60 

61try: 

62 # It's possible but silly to have testing.postgresql installed without 

63 # having the postgresql server installed (because then nothing in 

64 # testing.postgresql would work), so we use the presence of that module 

65 # to test whether we can expect the server to be available. 

66 import testing.postgresql # type: ignore[import] 

67except ImportError: 

68 testing = None 

69 

70import astropy.time 

71import sqlalchemy 

72from lsst.daf.butler import ( 

73 Butler, 

74 ButlerConfig, 

75 ButlerRepoIndex, 

76 CollectionType, 

77 Config, 

78 DataCoordinate, 

79 DatasetExistence, 

80 DatasetRef, 

81 DatasetType, 

82 FileDataset, 

83 FileTemplate, 

84 FileTemplateValidationError, 

85 NullDatastore, 

86 StorageClassFactory, 

87 ValidationError, 

88 script, 

89) 

90from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

91from lsst.daf.butler.datastores.fileDatastore import FileDatastore 

92from lsst.daf.butler.registries.sql import SqlRegistry 

93from lsst.daf.butler.registry import ( 

94 CollectionError, 

95 CollectionTypeError, 

96 ConflictingDefinitionError, 

97 DataIdValueError, 

98 MissingCollectionError, 

99 OrphanedRecordError, 

100) 

101from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

102from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir, safeTestTempDir 

103from lsst.resources import ResourcePath 

104from lsst.utils import doImportType 

105from lsst.utils.introspection import get_full_type_name 

106 

107if TYPE_CHECKING: 

108 import types 

109 

110 from lsst.daf.butler import Datastore, DimensionGraph, Registry, StorageClass 

111 

112TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

113 

114 

115def clean_environment() -> None: 

116 """Remove external environment variables that affect the tests.""" 

117 for k in ( 

118 "DAF_BUTLER_REPOSITORY_INDEX", 

119 "S3_ENDPOINT_URL", 

120 "AWS_ACCESS_KEY_ID", 

121 "AWS_SECRET_ACCESS_KEY", 

122 "AWS_SHARED_CREDENTIALS_FILE", 

123 ): 

124 os.environ.pop(k, None) 

125 

126 

127def makeExampleMetrics() -> MetricsExample: 

128 """Return example dataset suitable for tests.""" 

129 return MetricsExample( 

130 {"AM1": 5.2, "AM2": 30.6}, 

131 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

132 [563, 234, 456.7, 752, 8, 9, 27], 

133 ) 

134 

135 

136class TransactionTestError(Exception): 

137 """Specific error for testing transactions, to prevent misdiagnosing 

138 that might otherwise occur when a standard exception is used. 

139 """ 

140 

141 pass 

142 

143 

144class ButlerConfigTests(unittest.TestCase): 

145 """Simple tests for ButlerConfig that are not tested in any other test 

146 cases. 

147 """ 

148 

149 def testSearchPath(self) -> None: 

150 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

151 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

152 config1 = ButlerConfig(configFile) 

153 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

154 

155 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

156 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

157 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

158 self.assertIn("testConfigs", "\n".join(cm.output)) 

159 

160 key = ("datastore", "records", "table") 

161 self.assertNotEqual(config1[key], config2[key]) 

162 self.assertEqual(config2[key], "override_record") 

163 

164 

165class ButlerPutGetTests(TestCaseMixin): 

166 """Helper method for running a suite of put/get tests from different 

167 butler configurations. 

168 """ 

169 

170 root: str 

171 default_run = "ingésτ😺" 

172 storageClassFactory: StorageClassFactory 

173 configFile: str 

174 tmpConfigFile: str 

175 

176 @staticmethod 

177 def addDatasetType( 

178 datasetTypeName: str, dimensions: DimensionGraph, storageClass: StorageClass | str, registry: Registry 

179 ) -> DatasetType: 

180 """Create a DatasetType and register it""" 

181 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

182 registry.registerDatasetType(datasetType) 

183 return datasetType 

184 

185 @classmethod 

186 def setUpClass(cls) -> None: 

187 cls.storageClassFactory = StorageClassFactory() 

188 cls.storageClassFactory.addFromConfig(cls.configFile) 

189 

190 def assertGetComponents( 

191 self, 

192 butler: Butler, 

193 datasetRef: DatasetRef, 

194 components: tuple[str, ...], 

195 reference: Any, 

196 collections: Any = None, 

197 ) -> None: 

198 datasetType = datasetRef.datasetType 

199 dataId = datasetRef.dataId 

200 deferred = butler.getDeferred(datasetRef) 

201 

202 for component in components: 

203 compTypeName = datasetType.componentTypeName(component) 

204 result = butler.get(compTypeName, dataId, collections=collections) 

205 self.assertEqual(result, getattr(reference, component)) 

206 result_deferred = deferred.get(component=component) 

207 self.assertEqual(result_deferred, result) 

208 

209 def tearDown(self) -> None: 

210 removeTestTempDir(self.root) 

211 

212 def create_butler( 

213 self, run: str, storageClass: StorageClass | str, datasetTypeName: str 

214 ) -> tuple[Butler, DatasetType]: 

215 butler = Butler(self.tmpConfigFile, run=run) 

216 

217 collections = set(butler.registry.queryCollections()) 

218 self.assertEqual(collections, {run}) 

219 

220 # Create and register a DatasetType 

221 dimensions = butler.dimensions.extract(["instrument", "visit"]) 

222 

223 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

224 

225 # Add needed Dimensions 

226 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

227 butler.registry.insertDimensionData( 

228 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

229 ) 

230 butler.registry.insertDimensionData( 

231 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

232 ) 

233 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

234 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

235 butler.registry.insertDimensionData( 

236 "visit", 

237 { 

238 "instrument": "DummyCamComp", 

239 "id": 423, 

240 "name": "fourtwentythree", 

241 "physical_filter": "d-r", 

242 "visit_system": 1, 

243 "datetime_begin": visit_start, 

244 "datetime_end": visit_end, 

245 }, 

246 ) 

247 

248 # Add more visits for some later tests 

249 for visit_id in (424, 425): 

250 butler.registry.insertDimensionData( 

251 "visit", 

252 { 

253 "instrument": "DummyCamComp", 

254 "id": visit_id, 

255 "name": f"fourtwentyfour_{visit_id}", 

256 "physical_filter": "d-r", 

257 "visit_system": 1, 

258 }, 

259 ) 

260 return butler, datasetType 

261 

262 def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> Butler: 

263 # New datasets will be added to run and tag, but we will only look in 

264 # tag when looking up datasets. 

265 run = self.default_run 

266 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

267 assert butler.run is not None 

268 

269 # Create and store a dataset 

270 metric = makeExampleMetrics() 

271 dataId = butler.registry.expandDataId({"instrument": "DummyCamComp", "visit": 423}) 

272 

273 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

274 # and once with a DatasetType 

275 

276 # Keep track of any collections we add and do not clean up 

277 expected_collections = {run} 

278 

279 counter = 0 

280 ref = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run="put_run_1") 

281 args = tuple[DatasetRef] | tuple[str | DatasetType, DataCoordinate] 

282 for args in ((ref,), (datasetTypeName, dataId), (datasetType, dataId)): 

283 # Since we are using subTest we can get cascading failures 

284 # here with the first attempt failing and the others failing 

285 # immediately because the dataset already exists. Work around 

286 # this by using a distinct run collection each time 

287 counter += 1 

288 this_run = f"put_run_{counter}" 

289 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

290 expected_collections.update({this_run}) 

291 

292 with self.subTest(args=args): 

293 kwargs: dict[str, Any] = {} 

294 if not isinstance(args[0], DatasetRef): # type: ignore 

295 kwargs["run"] = this_run 

296 ref = butler.put(metric, *args, **kwargs) 

297 self.assertIsInstance(ref, DatasetRef) 

298 

299 # Test getDirect 

300 metricOut = butler.get(ref) 

301 self.assertEqual(metric, metricOut) 

302 # Test get 

303 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

304 self.assertEqual(metric, metricOut) 

305 # Test get with a datasetRef 

306 metricOut = butler.get(ref) 

307 self.assertEqual(metric, metricOut) 

308 # Test getDeferred with dataId 

309 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

310 self.assertEqual(metric, metricOut) 

311 # Test getDeferred with a ref 

312 metricOut = butler.getDeferred(ref).get() 

313 self.assertEqual(metric, metricOut) 

314 

315 # Check we can get components 

316 if storageClass.isComposite(): 

317 self.assertGetComponents( 

318 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

319 ) 

320 

321 # Can the artifacts themselves be retrieved? 

322 if not butler._datastore.isEphemeral: 

323 root_uri = ResourcePath(self.root) 

324 

325 for preserve_path in (True, False): 

326 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

327 # Use copy so that we can test that overwrite 

328 # protection works (using "auto" for File URIs would 

329 # use hard links and subsequent transfer would work 

330 # because it knows they are the same file). 

331 transferred = butler.retrieveArtifacts( 

332 [ref], destination, preserve_path=preserve_path, transfer="copy" 

333 ) 

334 self.assertGreater(len(transferred), 0) 

335 artifacts = list(ResourcePath.findFileResources([destination])) 

336 self.assertEqual(set(transferred), set(artifacts)) 

337 

338 for artifact in transferred: 

339 path_in_destination = artifact.relative_to(destination) 

340 self.assertIsNotNone(path_in_destination) 

341 assert path_in_destination is not None 

342 

343 # when path is not preserved there should not be 

344 # any path separators. 

345 num_seps = path_in_destination.count("/") 

346 if preserve_path: 

347 self.assertGreater(num_seps, 0) 

348 else: 

349 self.assertEqual(num_seps, 0) 

350 

351 primary_uri, secondary_uris = butler.getURIs(ref) 

352 n_uris = len(secondary_uris) 

353 if primary_uri: 

354 n_uris += 1 

355 self.assertEqual( 

356 len(artifacts), 

357 n_uris, 

358 "Comparing expected artifacts vs actual:" 

359 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

360 ) 

361 

362 if preserve_path: 

363 # No need to run these twice 

364 with self.assertRaises(ValueError): 

365 butler.retrieveArtifacts([ref], destination, transfer="move") 

366 

367 with self.assertRaises(FileExistsError): 

368 butler.retrieveArtifacts([ref], destination) 

369 

370 transferred_again = butler.retrieveArtifacts( 

371 [ref], destination, preserve_path=preserve_path, overwrite=True 

372 ) 

373 self.assertEqual(set(transferred_again), set(transferred)) 

374 

375 # Now remove the dataset completely. 

376 butler.pruneDatasets([ref], purge=True, unstore=True) 

377 # Lookup with original args should still fail. 

378 kwargs = {"collections": this_run} 

379 if isinstance(args[0], DatasetRef): 

380 kwargs = {} # Prevent warning from being issued. 

381 self.assertFalse(butler.exists(*args, **kwargs)) 

382 # get() should still fail. 

383 with self.assertRaises(FileNotFoundError): 

384 butler.get(ref) 

385 # Registry shouldn't be able to find it by dataset_id anymore. 

386 self.assertIsNone(butler.registry.getDataset(ref.id)) 

387 

388 # Do explicit registry removal since we know they are 

389 # empty 

390 butler.registry.removeCollection(this_run) 

391 expected_collections.remove(this_run) 

392 

393 # Create DatasetRef for put using default run. 

394 refIn = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run=butler.run) 

395 

396 # Check that getDeferred fails with standalone ref. 

397 with self.assertRaises(LookupError): 

398 butler.getDeferred(refIn) 

399 

400 # Put the dataset again, since the last thing we did was remove it 

401 # and we want to use the default collection. 

402 ref = butler.put(metric, refIn) 

403 

404 # Get with parameters 

405 stop = 4 

406 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

407 self.assertNotEqual(metric, sliced) 

408 self.assertEqual(metric.summary, sliced.summary) 

409 self.assertEqual(metric.output, sliced.output) 

410 assert metric.data is not None # for mypy 

411 self.assertEqual(metric.data[:stop], sliced.data) 

412 # getDeferred with parameters 

413 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

414 self.assertNotEqual(metric, sliced) 

415 self.assertEqual(metric.summary, sliced.summary) 

416 self.assertEqual(metric.output, sliced.output) 

417 self.assertEqual(metric.data[:stop], sliced.data) 

418 # getDeferred with deferred parameters 

419 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

420 self.assertNotEqual(metric, sliced) 

421 self.assertEqual(metric.summary, sliced.summary) 

422 self.assertEqual(metric.output, sliced.output) 

423 self.assertEqual(metric.data[:stop], sliced.data) 

424 

425 if storageClass.isComposite(): 

426 # Check that components can be retrieved 

427 metricOut = butler.get(ref.datasetType.name, dataId) 

428 compNameS = ref.datasetType.componentTypeName("summary") 

429 compNameD = ref.datasetType.componentTypeName("data") 

430 summary = butler.get(compNameS, dataId) 

431 self.assertEqual(summary, metric.summary) 

432 data = butler.get(compNameD, dataId) 

433 self.assertEqual(data, metric.data) 

434 

435 if "counter" in storageClass.derivedComponents: 

436 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

437 self.assertEqual(count, len(data)) 

438 

439 count = butler.get( 

440 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

441 ) 

442 self.assertEqual(count, stop) 

443 

444 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

445 assert compRef is not None 

446 summary = butler.get(compRef) 

447 self.assertEqual(summary, metric.summary) 

448 

449 # Create a Dataset type that has the same name but is inconsistent. 

450 inconsistentDatasetType = DatasetType( 

451 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

452 ) 

453 

454 # Getting with a dataset type that does not match registry fails 

455 with self.assertRaisesRegex(ValueError, "Supplied dataset type .* inconsistent with registry"): 

456 butler.get(inconsistentDatasetType, dataId) 

457 

458 # Combining a DatasetRef with a dataId should fail 

459 with self.assertRaisesRegex(ValueError, "DatasetRef given, cannot use dataId as well"): 

460 butler.get(ref, dataId) 

461 # Getting with an explicit ref should fail if the id doesn't match. 

462 with self.assertRaises(FileNotFoundError): 

463 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=uuid.UUID(int=101), run=butler.run)) 

464 

465 # Getting a dataset with unknown parameters should fail 

466 with self.assertRaisesRegex(KeyError, "Parameter 'unsupported' not understood"): 

467 butler.get(ref, parameters={"unsupported": True}) 

468 

469 # Check we have a collection 

470 collections = set(butler.registry.queryCollections()) 

471 self.assertEqual(collections, expected_collections) 

472 

473 # Clean up to check that we can remove something that may have 

474 # already had a component removed 

475 butler.pruneDatasets([ref], unstore=True, purge=True) 

476 

477 # Add the same ref again, so we can check that duplicate put fails. 

478 ref = butler.put(metric, datasetType, dataId) 

479 

480 # Repeat put will fail. 

481 with self.assertRaisesRegex( 

482 ConflictingDefinitionError, "A database constraint failure was triggered" 

483 ): 

484 butler.put(metric, datasetType, dataId) 

485 

486 # Remove the datastore entry. 

487 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

488 

489 # Put will still fail 

490 with self.assertRaisesRegex( 

491 ConflictingDefinitionError, "A database constraint failure was triggered" 

492 ): 

493 butler.put(metric, datasetType, dataId) 

494 

495 # Repeat the same sequence with resolved ref. 

496 butler.pruneDatasets([ref], unstore=True, purge=True) 

497 ref = butler.put(metric, refIn) 

498 

499 # Repeat put will fail. 

500 with self.assertRaisesRegex(ConflictingDefinitionError, "Datastore already contains dataset"): 

501 butler.put(metric, refIn) 

502 

503 # Remove the datastore entry. 

504 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

505 

506 # In case of resolved ref this write will succeed. 

507 ref = butler.put(metric, refIn) 

508 

509 # Leave the dataset in place since some downstream tests require 

510 # something to be present 

511 

512 return butler 

513 

514 def testDeferredCollectionPassing(self) -> None: 

515 # Construct a butler with no run or collection, but make it writeable. 

516 butler = Butler(self.tmpConfigFile, writeable=True) 

517 # Create and register a DatasetType 

518 dimensions = butler.dimensions.extract(["instrument", "visit"]) 

519 datasetType = self.addDatasetType( 

520 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

521 ) 

522 # Add needed Dimensions 

523 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

524 butler.registry.insertDimensionData( 

525 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

526 ) 

527 butler.registry.insertDimensionData( 

528 "visit", 

529 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

530 ) 

531 dataId = {"instrument": "DummyCamComp", "visit": 423} 

532 # Create dataset. 

533 metric = makeExampleMetrics() 

534 # Register a new run and put dataset. 

535 run = "deferred" 

536 self.assertTrue(butler.registry.registerRun(run)) 

537 # Second time it will be allowed but indicate no-op 

538 self.assertFalse(butler.registry.registerRun(run)) 

539 ref = butler.put(metric, datasetType, dataId, run=run) 

540 # Putting with no run should fail with TypeError. 

541 with self.assertRaises(CollectionError): 

542 butler.put(metric, datasetType, dataId) 

543 # Dataset should exist. 

544 self.assertTrue(butler.exists(datasetType, dataId, collections=[run])) 

545 # We should be able to get the dataset back, but with and without 

546 # a deferred dataset handle. 

547 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

548 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

549 # Trying to find the dataset without any collection is a TypeError. 

550 self.assertFalse(butler.exists(datasetType, dataId)) 

551 with self.assertRaises(CollectionError): 

552 butler.get(datasetType, dataId) 

553 # Associate the dataset with a different collection. 

554 butler.registry.registerCollection("tagged") 

555 butler.registry.associate("tagged", [ref]) 

556 # Deleting the dataset from the new collection should make it findable 

557 # in the original collection. 

558 butler.pruneDatasets([ref], tags=["tagged"]) 

559 self.assertTrue(butler.exists(datasetType, dataId, collections=[run])) 

560 

561 

562class ButlerTests(ButlerPutGetTests): 

563 """Tests for Butler.""" 

564 

565 useTempRoot = True 

566 validationCanFail: bool 

567 fullConfigKey: str | None 

568 registryStr: str | None 

569 datastoreName: list[str] | None 

570 datastoreStr: list[str] 

571 

572 def setUp(self) -> None: 

573 """Create a new butler root for each test.""" 

574 self.root = makeTestTempDir(TESTDIR) 

575 Butler.makeRepo(self.root, config=Config(self.configFile)) 

576 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

577 

578 def testConstructor(self) -> None: 

579 """Independent test of constructor.""" 

580 butler = Butler(self.tmpConfigFile, run=self.default_run) 

581 self.assertIsInstance(butler, Butler) 

582 

583 # Check that butler.yaml is added automatically. 

584 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

585 config_dir = self.tmpConfigFile[: -len(end)] 

586 butler = Butler(config_dir, run=self.default_run) 

587 self.assertIsInstance(butler, Butler) 

588 

589 # Even with a ResourcePath. 

590 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

591 self.assertIsInstance(butler, Butler) 

592 

593 collections = set(butler.registry.queryCollections()) 

594 self.assertEqual(collections, {self.default_run}) 

595 

596 # Check that some special characters can be included in run name. 

597 special_run = "u@b.c-A" 

598 butler_special = Butler(butler=butler, run=special_run) 

599 collections = set(butler_special.registry.queryCollections("*@*")) 

600 self.assertEqual(collections, {special_run}) 

601 

602 butler2 = Butler(butler=butler, collections=["other"]) 

603 self.assertEqual(butler2.collections, ("other",)) 

604 self.assertIsNone(butler2.run) 

605 self.assertIs(butler._datastore, butler2._datastore) 

606 

607 # Test that we can use an environment variable to find this 

608 # repository. 

609 butler_index = Config() 

610 butler_index["label"] = self.tmpConfigFile 

611 for suffix in (".yaml", ".json"): 

612 # Ensure that the content differs so that we know that 

613 # we aren't reusing the cache. 

614 bad_label = f"file://bucket/not_real{suffix}" 

615 butler_index["bad_label"] = bad_label 

616 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

617 butler_index.dumpToUri(temp_file) 

618 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

619 self.assertEqual(Butler.get_known_repos(), {"label", "bad_label"}) 

620 uri = Butler.get_repo_uri("bad_label") 

621 self.assertEqual(uri, ResourcePath(bad_label)) 

622 uri = Butler.get_repo_uri("label") 

623 butler = Butler(uri, writeable=False) 

624 self.assertIsInstance(butler, Butler) 

625 butler = Butler("label", writeable=False) 

626 self.assertIsInstance(butler, Butler) 

627 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

628 Butler("not_there", writeable=False) 

629 with self.assertRaisesRegex(FileNotFoundError, "resolved from alias 'bad_label'"): 

630 Butler("bad_label") 

631 with self.assertRaises(FileNotFoundError): 

632 # Should ignore aliases. 

633 Butler(ResourcePath("label", forceAbsolute=False)) 

634 with self.assertRaises(KeyError) as cm: 

635 Butler.get_repo_uri("missing") 

636 self.assertEqual( 

637 Butler.get_repo_uri("missing", True), ResourcePath("missing", forceAbsolute=False) 

638 ) 

639 self.assertIn("not known to", str(cm.exception)) 

640 # Should report no failure. 

641 self.assertEqual(ButlerRepoIndex.get_failure_reason(), "") 

642 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

643 # Now with empty configuration. 

644 butler_index = Config() 

645 butler_index.dumpToUri(temp_file) 

646 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

647 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases)"): 

648 Butler("label") 

649 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

650 # Now with bad contents. 

651 with open(temp_file.ospath, "w") as fh: 

652 print("'", file=fh) 

653 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

654 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases:.*could not be read)"): 

655 Butler("label") 

656 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

657 with self.assertRaises(FileNotFoundError): 

658 Butler.get_repo_uri("label") 

659 self.assertEqual(Butler.get_known_repos(), set()) 

660 

661 with self.assertRaisesRegex(FileNotFoundError, "index file not found"): 

662 Butler("label") 

663 

664 # Check that we can create Butler when the alias file is not found. 

665 butler = Butler(self.tmpConfigFile, writeable=False) 

666 self.assertIsInstance(butler, Butler) 

667 with self.assertRaises(KeyError) as cm: 

668 # No environment variable set. 

669 Butler.get_repo_uri("label") 

670 self.assertEqual(Butler.get_repo_uri("label", True), ResourcePath("label", forceAbsolute=False)) 

671 self.assertIn("No repository index defined", str(cm.exception)) 

672 with self.assertRaisesRegex(FileNotFoundError, "no known aliases.*No repository index"): 

673 # No aliases registered. 

674 Butler("not_there") 

675 self.assertEqual(Butler.get_known_repos(), set()) 

676 

677 def testBasicPutGet(self) -> None: 

678 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

679 self.runPutGetTest(storageClass, "test_metric") 

680 

681 def testCompositePutGetConcrete(self) -> None: 

682 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

683 butler = self.runPutGetTest(storageClass, "test_metric") 

684 

685 # Should *not* be disassembled 

686 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

687 self.assertEqual(len(datasets), 1) 

688 uri, components = butler.getURIs(datasets[0]) 

689 self.assertIsInstance(uri, ResourcePath) 

690 self.assertFalse(components) 

691 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

692 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

693 

694 # Predicted dataset 

695 dataId = {"instrument": "DummyCamComp", "visit": 424} 

696 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

697 self.assertFalse(components) 

698 self.assertIsInstance(uri, ResourcePath) 

699 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

700 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

701 

702 def testCompositePutGetVirtual(self) -> None: 

703 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

704 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

705 

706 # Should be disassembled 

707 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

708 self.assertEqual(len(datasets), 1) 

709 uri, components = butler.getURIs(datasets[0]) 

710 

711 if butler._datastore.isEphemeral: 

712 # Never disassemble in-memory datastore 

713 self.assertIsInstance(uri, ResourcePath) 

714 self.assertFalse(components) 

715 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

716 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

717 else: 

718 self.assertIsNone(uri) 

719 self.assertEqual(set(components), set(storageClass.components)) 

720 for compuri in components.values(): 

721 self.assertIsInstance(compuri, ResourcePath) 

722 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

723 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

724 

725 # Predicted dataset 

726 dataId = {"instrument": "DummyCamComp", "visit": 424} 

727 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

728 

729 if butler._datastore.isEphemeral: 

730 # Never disassembled 

731 self.assertIsInstance(uri, ResourcePath) 

732 self.assertFalse(components) 

733 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

734 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

735 else: 

736 self.assertIsNone(uri) 

737 self.assertEqual(set(components), set(storageClass.components)) 

738 for compuri in components.values(): 

739 self.assertIsInstance(compuri, ResourcePath) 

740 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

741 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

742 

743 def testStorageClassOverrideGet(self) -> None: 

744 """Test storage class conversion on get with override.""" 

745 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

746 datasetTypeName = "anything" 

747 run = self.default_run 

748 

749 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

750 

751 # Create and store a dataset. 

752 metric = makeExampleMetrics() 

753 dataId = {"instrument": "DummyCamComp", "visit": 423} 

754 

755 ref = butler.put(metric, datasetType, dataId) 

756 

757 # Return native type. 

758 retrieved = butler.get(ref) 

759 self.assertEqual(retrieved, metric) 

760 

761 # Specify an override. 

762 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion") 

763 model = butler.get(ref, storageClass=new_sc) 

764 self.assertNotEqual(type(model), type(retrieved)) 

765 self.assertIs(type(model), new_sc.pytype) 

766 self.assertEqual(retrieved, model) 

767 

768 # Defer but override later. 

769 deferred = butler.getDeferred(ref) 

770 model = deferred.get(storageClass=new_sc) 

771 self.assertIs(type(model), new_sc.pytype) 

772 self.assertEqual(retrieved, model) 

773 

774 # Defer but override up front. 

775 deferred = butler.getDeferred(ref, storageClass=new_sc) 

776 model = deferred.get() 

777 self.assertIs(type(model), new_sc.pytype) 

778 self.assertEqual(retrieved, model) 

779 

780 # Retrieve a component. Should be a tuple. 

781 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple") 

782 self.assertIs(type(data), tuple) 

783 self.assertEqual(data, tuple(retrieved.data)) 

784 

785 # Parameter on the write storage class should work regardless 

786 # of read storage class. 

787 data = butler.get( 

788 "anything.data", 

789 dataId, 

790 storageClass="StructuredDataDataTestTuple", 

791 parameters={"slice": slice(2, 4)}, 

792 ) 

793 self.assertEqual(len(data), 2) 

794 

795 # Try a parameter that is known to the read storage class but not 

796 # the write storage class. 

797 with self.assertRaises(KeyError): 

798 butler.get( 

799 "anything.data", 

800 dataId, 

801 storageClass="StructuredDataDataTestTuple", 

802 parameters={"xslice": slice(2, 4)}, 

803 ) 

804 

805 def testPytypePutCoercion(self) -> None: 

806 """Test python type coercion on Butler.get and put.""" 

807 # Store some data with the normal example storage class. 

808 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

809 datasetTypeName = "test_metric" 

810 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

811 

812 dataId = {"instrument": "DummyCamComp", "visit": 423} 

813 

814 # Put a dict and this should coerce to a MetricsExample 

815 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

816 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

817 test_metric = butler.get(metric_ref) 

818 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

819 self.assertEqual(test_metric.summary, test_dict["summary"]) 

820 self.assertEqual(test_metric.output, test_dict["output"]) 

821 

822 # Check that the put still works if a DatasetType is given with 

823 # a definition matching this python type. 

824 registry_type = butler.registry.getDatasetType(datasetTypeName) 

825 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

826 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

827 self.assertEqual(metric2_ref.datasetType, registry_type) 

828 

829 # The get will return the type expected by registry. 

830 test_metric2 = butler.get(metric2_ref) 

831 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

832 

833 # Make a new DatasetRef with the compatible but different DatasetType. 

834 # This should now return a dict. 

835 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

836 test_dict2 = butler.get(new_ref) 

837 self.assertEqual(get_full_type_name(test_dict2), "dict") 

838 

839 # Get it again with the wrong dataset type definition using get() 

840 # rather than get(). This should be consistent with get() 

841 # behavior and return the type of the DatasetType. 

842 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

843 self.assertEqual(get_full_type_name(test_dict3), "dict") 

844 

845 def testIngest(self) -> None: 

846 butler = Butler(self.tmpConfigFile, run=self.default_run) 

847 

848 # Create and register a DatasetType 

849 dimensions = butler.dimensions.extract(["instrument", "visit", "detector"]) 

850 

851 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

852 datasetTypeName = "metric" 

853 

854 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

855 

856 # Add needed Dimensions 

857 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

858 butler.registry.insertDimensionData( 

859 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

860 ) 

861 for detector in (1, 2): 

862 butler.registry.insertDimensionData( 

863 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

864 ) 

865 

866 butler.registry.insertDimensionData( 

867 "visit", 

868 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

869 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

870 ) 

871 

872 formatter = doImportType("lsst.daf.butler.formatters.yaml.YamlFormatter") 

873 dataRoot = os.path.join(TESTDIR, "data", "basic") 

874 datasets = [] 

875 for detector in (1, 2): 

876 detector_name = f"detector_{detector}" 

877 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

878 dataId = butler.registry.expandDataId( 

879 {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

880 ) 

881 # Create a DatasetRef for ingest 

882 refIn = DatasetRef(datasetType, dataId, run=self.default_run) 

883 

884 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

885 

886 butler.ingest(*datasets, transfer="copy") 

887 

888 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

889 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

890 

891 metrics1 = butler.get(datasetTypeName, dataId1) 

892 metrics2 = butler.get(datasetTypeName, dataId2) 

893 self.assertNotEqual(metrics1, metrics2) 

894 

895 # Compare URIs 

896 uri1 = butler.getURI(datasetTypeName, dataId1) 

897 uri2 = butler.getURI(datasetTypeName, dataId2) 

898 self.assertNotEqual(uri1, uri2) 

899 

900 # Now do a multi-dataset but single file ingest 

901 metricFile = os.path.join(dataRoot, "detectors.yaml") 

902 refs = [] 

903 for detector in (1, 2): 

904 detector_name = f"detector_{detector}" 

905 dataId = butler.registry.expandDataId( 

906 {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

907 ) 

908 # Create a DatasetRef for ingest 

909 refs.append(DatasetRef(datasetType, dataId, run=self.default_run)) 

910 

911 # Test "move" transfer to ensure that the files themselves 

912 # have disappeared following ingest. 

913 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile: 

914 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy") 

915 

916 datasets = [] 

917 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter)) 

918 

919 # For first ingest use copy. 

920 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

921 

922 # Now try to ingest again in "execution butler" mode where 

923 # the registry entries exist but the datastore does not have 

924 # the files. We also need to strip the dimension records to ensure 

925 # that they will be re-added by the ingest. 

926 ref = datasets[0].refs[0] 

927 datasets[0].refs = [ 

928 cast( 

929 DatasetRef, 

930 butler.registry.findDataset(ref.datasetType, dataId=ref.dataId, collections=ref.run), 

931 ) 

932 for ref in datasets[0].refs 

933 ] 

934 all_refs = [] 

935 for dataset in datasets: 

936 refs = [] 

937 for ref in dataset.refs: 

938 # Create a dict from the dataId to drop the records. 

939 new_data_id = {str(k): v for k, v in ref.dataId.items()} 

940 new_ref = butler.registry.findDataset(ref.datasetType, new_data_id, collections=ref.run) 

941 assert new_ref is not None 

942 self.assertFalse(new_ref.dataId.hasRecords()) 

943 refs.append(new_ref) 

944 dataset.refs = refs 

945 all_refs.extend(dataset.refs) 

946 butler.pruneDatasets(all_refs, disassociate=False, unstore=True, purge=False) 

947 

948 # Use move mode to test that the file is deleted. Also 

949 # disable recording of file size. 

950 butler.ingest(*datasets, transfer="move", record_validation_info=False) 

951 

952 # Check that every ref now has records. 

953 for dataset in datasets: 

954 for ref in dataset.refs: 

955 self.assertTrue(ref.dataId.hasRecords()) 

956 

957 # Ensure that the file has disappeared. 

958 self.assertFalse(tempFile.exists()) 

959 

960 # Check that the datastore recorded no file size. 

961 # Not all datastores can support this. 

962 try: 

963 infos = butler._datastore.getStoredItemsInfo(datasets[0].refs[0]) # type: ignore[attr-defined] 

964 self.assertEqual(infos[0].file_size, -1) 

965 except AttributeError: 

966 pass 

967 

968 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

969 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

970 

971 multi1 = butler.get(datasetTypeName, dataId1) 

972 multi2 = butler.get(datasetTypeName, dataId2) 

973 

974 self.assertEqual(multi1, metrics1) 

975 self.assertEqual(multi2, metrics2) 

976 

977 # Compare URIs 

978 uri1 = butler.getURI(datasetTypeName, dataId1) 

979 uri2 = butler.getURI(datasetTypeName, dataId2) 

980 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

981 

982 # Test that removing one does not break the second 

983 # This line will issue a warning log message for a ChainedDatastore 

984 # that uses an InMemoryDatastore since in-memory can not ingest 

985 # files. 

986 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

987 self.assertFalse(butler.exists(datasetTypeName, dataId1)) 

988 self.assertTrue(butler.exists(datasetTypeName, dataId2)) 

989 multi2b = butler.get(datasetTypeName, dataId2) 

990 self.assertEqual(multi2, multi2b) 

991 

992 # Ensure we can ingest 0 datasets 

993 datasets = [] 

994 butler.ingest(*datasets) 

995 

996 def testPickle(self) -> None: 

997 """Test pickle support.""" 

998 butler = Butler(self.tmpConfigFile, run=self.default_run) 

999 butlerOut = pickle.loads(pickle.dumps(butler)) 

1000 self.assertIsInstance(butlerOut, Butler) 

1001 self.assertEqual(butlerOut._config, butler._config) 

1002 self.assertEqual(butlerOut.collections, butler.collections) 

1003 self.assertEqual(butlerOut.run, butler.run) 

1004 

1005 def testGetDatasetTypes(self) -> None: 

1006 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1007 dimensions = butler.dimensions.extract(["instrument", "visit", "physical_filter"]) 

1008 dimensionEntries: list[tuple[str, list[Mapping[str, Any]]]] = [ 

1009 ( 

1010 "instrument", 

1011 [ 

1012 {"instrument": "DummyCam"}, 

1013 {"instrument": "DummyHSC"}, 

1014 {"instrument": "DummyCamComp"}, 

1015 ], 

1016 ), 

1017 ("physical_filter", [{"instrument": "DummyCam", "name": "d-r", "band": "R"}]), 

1018 ("visit", [{"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}]), 

1019 ] 

1020 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1021 # Add needed Dimensions 

1022 for element, data in dimensionEntries: 

1023 butler.registry.insertDimensionData(element, *data) 

1024 

1025 # When a DatasetType is added to the registry entries are not created 

1026 # for components but querying them can return the components. 

1027 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

1028 components = set() 

1029 for datasetTypeName in datasetTypeNames: 

1030 # Create and register a DatasetType 

1031 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1032 

1033 for componentName in storageClass.components: 

1034 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

1035 

1036 fromRegistry: set[DatasetType] = set() 

1037 for parent_dataset_type in butler.registry.queryDatasetTypes(): 

1038 fromRegistry.add(parent_dataset_type) 

1039 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes()) 

1040 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

1041 

1042 # Now that we have some dataset types registered, validate them 

1043 butler.validateConfiguration( 

1044 ignore=[ 

1045 "test_metric_comp", 

1046 "metric3", 

1047 "metric5", 

1048 "calexp", 

1049 "DummySC", 

1050 "datasetType.component", 

1051 "random_data", 

1052 "random_data_2", 

1053 ] 

1054 ) 

1055 

1056 # Add a new datasetType that will fail template validation 

1057 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

1058 if self.validationCanFail: 

1059 with self.assertRaises(ValidationError): 

1060 butler.validateConfiguration() 

1061 

1062 # Rerun validation but with a subset of dataset type names 

1063 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

1064 

1065 # Rerun validation but ignore the bad datasetType 

1066 butler.validateConfiguration( 

1067 ignore=[ 

1068 "test_metric_comp", 

1069 "metric3", 

1070 "metric5", 

1071 "calexp", 

1072 "DummySC", 

1073 "datasetType.component", 

1074 "random_data", 

1075 "random_data_2", 

1076 ] 

1077 ) 

1078 

1079 def testTransaction(self) -> None: 

1080 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1081 datasetTypeName = "test_metric" 

1082 dimensions = butler.dimensions.extract(["instrument", "visit"]) 

1083 dimensionEntries: tuple[tuple[str, Mapping[str, Any]], ...] = ( 

1084 ("instrument", {"instrument": "DummyCam"}), 

1085 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

1086 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

1087 ) 

1088 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1089 metric = makeExampleMetrics() 

1090 dataId = {"instrument": "DummyCam", "visit": 42} 

1091 # Create and register a DatasetType 

1092 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1093 with self.assertRaises(TransactionTestError): 

1094 with butler.transaction(): 

1095 # Add needed Dimensions 

1096 for args in dimensionEntries: 

1097 butler.registry.insertDimensionData(*args) 

1098 # Store a dataset 

1099 ref = butler.put(metric, datasetTypeName, dataId) 

1100 self.assertIsInstance(ref, DatasetRef) 

1101 # Test getDirect 

1102 metricOut = butler.get(ref) 

1103 self.assertEqual(metric, metricOut) 

1104 # Test get 

1105 metricOut = butler.get(datasetTypeName, dataId) 

1106 self.assertEqual(metric, metricOut) 

1107 # Check we can get components 

1108 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

1109 raise TransactionTestError("This should roll back the entire transaction") 

1110 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

1111 butler.registry.expandDataId(dataId) 

1112 # Should raise LookupError for missing data ID value 

1113 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

1114 butler.get(datasetTypeName, dataId) 

1115 # Also check explicitly if Dataset entry is missing 

1116 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

1117 # Direct retrieval should not find the file in the Datastore 

1118 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

1119 butler.get(ref) 

1120 

1121 def testMakeRepo(self) -> None: 

1122 """Test that we can write butler configuration to a new repository via 

1123 the Butler.makeRepo interface and then instantiate a butler from the 

1124 repo root. 

1125 """ 

1126 # Do not run the test if we know this datastore configuration does 

1127 # not support a file system root 

1128 if self.fullConfigKey is None: 

1129 return 

1130 

1131 # create two separate directories 

1132 root1 = tempfile.mkdtemp(dir=self.root) 

1133 root2 = tempfile.mkdtemp(dir=self.root) 

1134 

1135 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

1136 limited = Config(self.configFile) 

1137 butler1 = Butler(butlerConfig) 

1138 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

1139 full = Config(self.tmpConfigFile) 

1140 butler2 = Butler(butlerConfig) 

1141 # Butlers should have the same configuration regardless of whether 

1142 # defaults were expanded. 

1143 self.assertEqual(butler1._config, butler2._config) 

1144 # Config files loaded directly should not be the same. 

1145 self.assertNotEqual(limited, full) 

1146 # Make sure "limited" doesn't have a few keys we know it should be 

1147 # inheriting from defaults. 

1148 self.assertIn(self.fullConfigKey, full) 

1149 self.assertNotIn(self.fullConfigKey, limited) 

1150 

1151 # Collections don't appear until something is put in them 

1152 collections1 = set(butler1.registry.queryCollections()) 

1153 self.assertEqual(collections1, set()) 

1154 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1155 

1156 # Check that a config with no associated file name will not 

1157 # work properly with relocatable Butler repo 

1158 butlerConfig.configFile = None 

1159 with self.assertRaises(ValueError): 

1160 Butler(butlerConfig) 

1161 

1162 with self.assertRaises(FileExistsError): 

1163 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1164 

1165 def testStringification(self) -> None: 

1166 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1167 butlerStr = str(butler) 

1168 

1169 if self.datastoreStr is not None: 

1170 for testStr in self.datastoreStr: 

1171 self.assertIn(testStr, butlerStr) 

1172 if self.registryStr is not None: 

1173 self.assertIn(self.registryStr, butlerStr) 

1174 

1175 datastoreName = butler._datastore.name 

1176 if self.datastoreName is not None: 

1177 for testStr in self.datastoreName: 

1178 self.assertIn(testStr, datastoreName) 

1179 

1180 def testButlerRewriteDataId(self) -> None: 

1181 """Test that dataIds can be rewritten based on dimension records.""" 

1182 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1183 

1184 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1185 datasetTypeName = "random_data" 

1186 

1187 # Create dimension records. 

1188 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1189 butler.registry.insertDimensionData( 

1190 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1191 ) 

1192 butler.registry.insertDimensionData( 

1193 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1194 ) 

1195 

1196 dimensions = butler.dimensions.extract(["instrument", "exposure"]) 

1197 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1198 butler.registry.registerDatasetType(datasetType) 

1199 

1200 n_exposures = 5 

1201 dayobs = 20210530 

1202 

1203 for i in range(n_exposures): 

1204 butler.registry.insertDimensionData( 

1205 "exposure", 

1206 { 

1207 "instrument": "DummyCamComp", 

1208 "id": i, 

1209 "obs_id": f"exp{i}", 

1210 "seq_num": i, 

1211 "day_obs": dayobs, 

1212 "physical_filter": "d-r", 

1213 }, 

1214 ) 

1215 

1216 # Write some data. 

1217 for i in range(n_exposures): 

1218 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1219 

1220 # Use the seq_num for the put to test rewriting. 

1221 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1222 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1223 

1224 # Check that the exposure is correct in the dataId 

1225 self.assertEqual(ref.dataId["exposure"], i) 

1226 

1227 # and check that we can get the dataset back with the same dataId 

1228 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1229 self.assertEqual(new_metric, metric) 

1230 

1231 

1232class FileDatastoreButlerTests(ButlerTests): 

1233 """Common tests and specialization of ButlerTests for butlers backed 

1234 by datastores that inherit from FileDatastore. 

1235 """ 

1236 

1237 def checkFileExists(self, root: str | ResourcePath, relpath: str | ResourcePath) -> bool: 

1238 """Check if file exists at a given path (relative to root). 

1239 

1240 Test testPutTemplates verifies actual physical existance of the files 

1241 in the requested location. 

1242 """ 

1243 uri = ResourcePath(root, forceDirectory=True) 

1244 return uri.join(relpath).exists() 

1245 

1246 def testPutTemplates(self) -> None: 

1247 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1248 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1249 

1250 # Add needed Dimensions 

1251 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1252 butler.registry.insertDimensionData( 

1253 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1254 ) 

1255 butler.registry.insertDimensionData( 

1256 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1257 ) 

1258 butler.registry.insertDimensionData( 

1259 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1260 ) 

1261 

1262 # Create and store a dataset 

1263 metric = makeExampleMetrics() 

1264 

1265 # Create two almost-identical DatasetTypes (both will use default 

1266 # template) 

1267 dimensions = butler.dimensions.extract(["instrument", "visit"]) 

1268 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1269 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1270 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1271 

1272 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1273 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1274 

1275 # Put with exactly the data ID keys needed 

1276 ref = butler.put(metric, "metric1", dataId1) 

1277 uri = butler.getURI(ref) 

1278 self.assertTrue(uri.exists()) 

1279 self.assertTrue( 

1280 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle") 

1281 ) 

1282 

1283 # Check the template based on dimensions 

1284 if hasattr(butler._datastore, "templates"): 

1285 butler._datastore.templates.validateTemplates([ref]) 

1286 

1287 # Put with extra data ID keys (physical_filter is an optional 

1288 # dependency); should not change template (at least the way we're 

1289 # defining them to behave now; the important thing is that they 

1290 # must be consistent). 

1291 ref = butler.put(metric, "metric2", dataId2) 

1292 uri = butler.getURI(ref) 

1293 self.assertTrue(uri.exists()) 

1294 self.assertTrue( 

1295 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle") 

1296 ) 

1297 

1298 # Check the template based on dimensions 

1299 if hasattr(butler._datastore, "templates"): 

1300 butler._datastore.templates.validateTemplates([ref]) 

1301 

1302 # Use a template that has a typo in dimension record metadata. 

1303 # Easier to test with a butler that has a ref with records attached. 

1304 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1305 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1306 path = template.format(ref) 

1307 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1308 

1309 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1310 with self.assertRaises(KeyError): 

1311 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1312 template.format(ref) 

1313 

1314 # Now use a file template that will not result in unique filenames 

1315 with self.assertRaises(FileTemplateValidationError): 

1316 butler.put(metric, "metric3", dataId1) 

1317 

1318 def testImportExport(self) -> None: 

1319 # Run put/get tests just to create and populate a repo. 

1320 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1321 self.runImportExportTest(storageClass) 

1322 

1323 @unittest.expectedFailure 

1324 def testImportExportVirtualComposite(self) -> None: 

1325 # Run put/get tests just to create and populate a repo. 

1326 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1327 self.runImportExportTest(storageClass) 

1328 

1329 def runImportExportTest(self, storageClass: StorageClass) -> None: 

1330 """Test exporting and importing. 

1331 

1332 This test does an export to a temp directory and an import back 

1333 into a new temp directory repo. It does not assume a posix datastore. 

1334 """ 

1335 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1336 

1337 # Test that we must have a file extension. 

1338 with self.assertRaises(ValueError): 

1339 with exportButler.export(filename="dump", directory=".") as export: 

1340 pass 

1341 

1342 # Test that unknown format is not allowed. 

1343 with self.assertRaises(ValueError): 

1344 with exportButler.export(filename="dump.fits", directory=".") as export: 

1345 pass 

1346 

1347 # Test that the repo actually has at least one dataset. 

1348 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1349 self.assertGreater(len(datasets), 0) 

1350 # Add a DimensionRecord that's unused by those datasets. 

1351 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1352 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1353 # Export and then import datasets. 

1354 with safeTestTempDir(TESTDIR) as exportDir: 

1355 exportFile = os.path.join(exportDir, "exports.yaml") 

1356 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1357 export.saveDatasets(datasets) 

1358 # Export the same datasets again. This should quietly do 

1359 # nothing because of internal deduplication, and it shouldn't 

1360 # complain about being asked to export the "htm7" elements even 

1361 # though there aren't any in these datasets or in the database. 

1362 export.saveDatasets(datasets, elements=["htm7"]) 

1363 # Save one of the data IDs again; this should be harmless 

1364 # because of internal deduplication. 

1365 export.saveDataIds([datasets[0].dataId]) 

1366 # Save some dimension records directly. 

1367 export.saveDimensionData("skymap", [skymapRecord]) 

1368 self.assertTrue(os.path.exists(exportFile)) 

1369 with safeTestTempDir(TESTDIR) as importDir: 

1370 # We always want this to be a local posix butler 

1371 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1372 # Calling script.butlerImport tests the implementation of the 

1373 # butler command line interface "import" subcommand. Functions 

1374 # in the script folder are generally considered protected and 

1375 # should not be used as public api. 

1376 with open(exportFile) as f: 

1377 script.butlerImport( 

1378 importDir, 

1379 export_file=f, 

1380 directory=exportDir, 

1381 transfer="auto", 

1382 skip_dimensions=None, 

1383 ) 

1384 importButler = Butler(importDir, run=self.default_run) 

1385 for ref in datasets: 

1386 with self.subTest(ref=ref): 

1387 # Test for existence by passing in the DatasetType and 

1388 # data ID separately, to avoid lookup by dataset_id. 

1389 self.assertTrue(importButler.exists(ref.datasetType, ref.dataId)) 

1390 self.assertEqual( 

1391 list(importButler.registry.queryDimensionRecords("skymap")), 

1392 [importButler.dimensions["skymap"].RecordClass(**skymapRecord)], 

1393 ) 

1394 

1395 def testRemoveRuns(self) -> None: 

1396 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1397 butler = Butler(self.tmpConfigFile, writeable=True) 

1398 # Load registry data with dimensions to hang datasets off of. 

1399 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1400 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1401 # Add some RUN-type collection. 

1402 run1 = "run1" 

1403 butler.registry.registerRun(run1) 

1404 run2 = "run2" 

1405 butler.registry.registerRun(run2) 

1406 # put a dataset in each 

1407 metric = makeExampleMetrics() 

1408 dimensions = butler.dimensions.extract(["instrument", "physical_filter"]) 

1409 datasetType = self.addDatasetType( 

1410 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1411 ) 

1412 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1413 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1414 uri1 = butler.getURI(ref1) 

1415 uri2 = butler.getURI(ref2) 

1416 

1417 with self.assertRaises(OrphanedRecordError): 

1418 butler.registry.removeDatasetType(datasetType.name) 

1419 

1420 # Remove from both runs with different values for unstore. 

1421 butler.removeRuns([run1], unstore=True) 

1422 butler.removeRuns([run2], unstore=False) 

1423 # Should be nothing in registry for either one, and datastore should 

1424 # not think either exists. 

1425 with self.assertRaises(MissingCollectionError): 

1426 butler.registry.getCollectionType(run1) 

1427 with self.assertRaises(MissingCollectionError): 

1428 butler.registry.getCollectionType(run2) 

1429 self.assertFalse(butler.stored(ref1)) 

1430 self.assertFalse(butler.stored(ref2)) 

1431 # The ref we unstored should be gone according to the URI, but the 

1432 # one we forgot should still be around. 

1433 self.assertFalse(uri1.exists()) 

1434 self.assertTrue(uri2.exists()) 

1435 

1436 # Now that the collections have been pruned we can remove the 

1437 # dataset type 

1438 butler.registry.removeDatasetType(datasetType.name) 

1439 

1440 with self.assertLogs("lsst.daf.butler.registries", "INFO") as cm: 

1441 butler.registry.removeDatasetType(("test*", "test*")) 

1442 self.assertIn("not defined", "\n".join(cm.output)) 

1443 

1444 

1445class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1446 """PosixDatastore specialization of a butler""" 

1447 

1448 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1449 fullConfigKey: str | None = ".datastore.formatters" 

1450 validationCanFail = True 

1451 datastoreStr = ["/tmp"] 

1452 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1453 registryStr = "/gen3.sqlite3" 

1454 

1455 def testPathConstructor(self) -> None: 

1456 """Independent test of constructor using PathLike.""" 

1457 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1458 self.assertIsInstance(butler, Butler) 

1459 

1460 # And again with a Path object with the butler yaml 

1461 path = pathlib.Path(self.tmpConfigFile) 

1462 butler = Butler(path, writeable=False) 

1463 self.assertIsInstance(butler, Butler) 

1464 

1465 # And again with a Path object without the butler yaml 

1466 # (making sure we skip it if the tmp config doesn't end 

1467 # in butler.yaml -- which is the case for a subclass) 

1468 if self.tmpConfigFile.endswith("butler.yaml"): 

1469 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1470 butler = Butler(path, writeable=False) 

1471 self.assertIsInstance(butler, Butler) 

1472 

1473 def testExportTransferCopy(self) -> None: 

1474 """Test local export using all transfer modes""" 

1475 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1476 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1477 # Test that the repo actually has at least one dataset. 

1478 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1479 self.assertGreater(len(datasets), 0) 

1480 uris = [exportButler.getURI(d) for d in datasets] 

1481 assert isinstance(exportButler._datastore, FileDatastore) 

1482 datastoreRoot = exportButler.get_datastore_roots()[exportButler.get_datastore_names()[0]] 

1483 

1484 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1485 

1486 for path in pathsInStore: 

1487 # Assume local file system 

1488 assert path is not None 

1489 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1490 

1491 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1492 with safeTestTempDir(TESTDIR) as exportDir: 

1493 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1494 export.saveDatasets(datasets) 

1495 for path in pathsInStore: 

1496 assert path is not None 

1497 self.assertTrue( 

1498 self.checkFileExists(exportDir, path), 

1499 f"Check that mode {transfer} exported files", 

1500 ) 

1501 

1502 def testPruneDatasets(self) -> None: 

1503 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1504 butler = Butler(self.tmpConfigFile, writeable=True) 

1505 assert isinstance(butler._datastore, FileDatastore) 

1506 # Load registry data with dimensions to hang datasets off of. 

1507 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1508 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1509 # Add some RUN-type collections. 

1510 run1 = "run1" 

1511 butler.registry.registerRun(run1) 

1512 run2 = "run2" 

1513 butler.registry.registerRun(run2) 

1514 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1515 # different runs. ref3 has a different data ID. 

1516 metric = makeExampleMetrics() 

1517 dimensions = butler.dimensions.extract(["instrument", "physical_filter"]) 

1518 datasetType = self.addDatasetType( 

1519 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1520 ) 

1521 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1522 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1523 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1524 

1525 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1526 for ref, stored in many_stored.items(): 

1527 self.assertTrue(stored, f"Ref {ref} should be stored") 

1528 

1529 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1530 for ref, exists in many_exists.items(): 

1531 self.assertTrue(exists, f"Checking ref {ref} exists.") 

1532 self.assertEqual(exists, DatasetExistence.VERIFIED, f"Ref {ref} should be stored") 

1533 

1534 # Simple prune. 

1535 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1536 self.assertFalse(butler.exists(ref1.datasetType, ref1.dataId, collections=run1)) 

1537 

1538 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1539 for ref, stored in many_stored.items(): 

1540 self.assertFalse(stored, f"Ref {ref} should not be stored") 

1541 

1542 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1543 for ref, exists in many_exists.items(): 

1544 self.assertEqual(exists, DatasetExistence.UNRECOGNIZED, f"Ref {ref} should not be stored") 

1545 

1546 # Put data back. 

1547 ref1_new = butler.put(metric, ref1) 

1548 self.assertEqual(ref1_new, ref1) # Reuses original ID. 

1549 ref2 = butler.put(metric, ref2) 

1550 

1551 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1552 self.assertTrue(many_stored[ref1]) 

1553 self.assertTrue(many_stored[ref2]) 

1554 self.assertFalse(many_stored[ref3]) 

1555 

1556 ref3 = butler.put(metric, ref3) 

1557 

1558 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1559 for ref, exists in many_exists.items(): 

1560 self.assertTrue(exists, f"Ref {ref} should not be stored") 

1561 

1562 # Clear out the datasets from registry and start again. 

1563 refs = [ref1, ref2, ref3] 

1564 butler.pruneDatasets(refs, purge=True, unstore=True) 

1565 for ref in refs: 

1566 butler.put(metric, ref) 

1567 

1568 # Confirm we can retrieve deferred. 

1569 dref1 = butler.getDeferred(ref1) # known and exists 

1570 metric1 = dref1.get() 

1571 self.assertEqual(metric1, metric) 

1572 

1573 # Test different forms of file availability. 

1574 # Need to be in a state where: 

1575 # - one ref just has registry record. 

1576 # - one ref has a missing file but a datastore record. 

1577 # - one ref has a missing datastore record but file is there. 

1578 # - one ref does not exist anywhere. 

1579 # Do not need to test a ref that has everything since that is tested 

1580 # above. 

1581 ref0 = DatasetRef( 

1582 datasetType, 

1583 DataCoordinate.standardize( 

1584 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, universe=butler.dimensions 

1585 ), 

1586 run=run1, 

1587 ) 

1588 

1589 # Delete from datastore and retain in Registry. 

1590 butler.pruneDatasets([ref1], purge=False, unstore=True, disassociate=False) 

1591 

1592 # File has been removed. 

1593 uri2 = butler.getURI(ref2) 

1594 uri2.remove() 

1595 

1596 # Datastore has lost track. 

1597 butler._datastore.forget([ref3]) 

1598 

1599 # First test with a standard butler. 

1600 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True) 

1601 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1602 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED) 

1603 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE) 

1604 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED) 

1605 

1606 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=False) 

1607 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1608 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED | DatasetExistence._ASSUMED) 

1609 self.assertEqual(exists_many[ref2], DatasetExistence.KNOWN) 

1610 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ASSUMED) 

1611 self.assertTrue(exists_many[ref2]) 

1612 

1613 # Check that per-ref query gives the same answer as many query. 

1614 for ref, exists in exists_many.items(): 

1615 self.assertEqual(butler.exists(ref, full_check=False), exists) 

1616 

1617 # Get deferred checks for existence before it allows it to be 

1618 # retrieved. 

1619 with self.assertRaises(LookupError): 

1620 butler.getDeferred(ref3) # not known, file exists 

1621 dref2 = butler.getDeferred(ref2) # known but file missing 

1622 with self.assertRaises(FileNotFoundError): 

1623 dref2.get() 

1624 

1625 # Test again with a trusting butler. 

1626 butler._datastore.trustGetRequest = True 

1627 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True) 

1628 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1629 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED) 

1630 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE) 

1631 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ARTIFACT) 

1632 

1633 # When trusting we can get a deferred dataset handle that is not 

1634 # known but does exist. 

1635 dref3 = butler.getDeferred(ref3) 

1636 metric3 = dref3.get() 

1637 self.assertEqual(metric3, metric) 

1638 

1639 # Check that per-ref query gives the same answer as many query. 

1640 for ref, exists in exists_many.items(): 

1641 self.assertEqual(butler.exists(ref, full_check=True), exists) 

1642 

1643 # Create a ref that surprisingly has the UUID of an existing ref 

1644 # but is not the same. 

1645 ref_bad = DatasetRef(datasetType, dataId=ref3.dataId, run=ref3.run, id=ref2.id) 

1646 with self.assertRaises(ValueError): 

1647 butler.exists(ref_bad) 

1648 

1649 # Create a ref that has a compatible storage class. 

1650 ref_compat = ref2.overrideStorageClass("StructuredDataDict") 

1651 exists = butler.exists(ref_compat) 

1652 self.assertEqual(exists, exists_many[ref2]) 

1653 

1654 # Remove everything and start from scratch. 

1655 butler._datastore.trustGetRequest = False 

1656 butler.pruneDatasets(refs, purge=True, unstore=True) 

1657 for ref in refs: 

1658 butler.put(metric, ref) 

1659 

1660 # These tests mess directly with the trash table and can leave the 

1661 # datastore in an odd state. Do them at the end. 

1662 # Check that in normal mode, deleting the record will lead to 

1663 # trash not touching the file. 

1664 uri1 = butler.getURI(ref1) 

1665 butler._datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table 

1666 butler._datastore.forget([ref1]) 

1667 butler._datastore.trash(ref1) 

1668 butler._datastore.emptyTrash() 

1669 self.assertTrue(uri1.exists()) 

1670 uri1.remove() # Clean it up. 

1671 

1672 # Simulate execution butler setup by deleting the datastore 

1673 # record but keeping the file around and trusting. 

1674 butler._datastore.trustGetRequest = True 

1675 uris = butler.get_many_uris([ref2, ref3]) 

1676 uri2 = uris[ref2].primaryURI 

1677 uri3 = uris[ref3].primaryURI 

1678 self.assertTrue(uri2.exists()) 

1679 self.assertTrue(uri3.exists()) 

1680 

1681 # Remove the datastore record. 

1682 butler._datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table 

1683 butler._datastore.forget([ref2]) 

1684 self.assertTrue(uri2.exists()) 

1685 butler._datastore.trash([ref2, ref3]) 

1686 # Immediate removal for ref2 file 

1687 self.assertFalse(uri2.exists()) 

1688 # But ref3 has to wait for the empty. 

1689 self.assertTrue(uri3.exists()) 

1690 butler._datastore.emptyTrash() 

1691 self.assertFalse(uri3.exists()) 

1692 

1693 # Clear out the datasets from registry. 

1694 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1695 

1696 def testPytypeCoercion(self) -> None: 

1697 """Test python type coercion on Butler.get and put.""" 

1698 # Store some data with the normal example storage class. 

1699 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1700 datasetTypeName = "test_metric" 

1701 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1702 

1703 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1704 metric = butler.get(datasetTypeName, dataId=dataId) 

1705 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1706 

1707 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1708 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1709 

1710 # Now need to hack the registry dataset type definition. 

1711 # There is no API for this. 

1712 assert isinstance(butler._registry, SqlRegistry) 

1713 manager = butler._registry._managers.datasets 

1714 assert hasattr(manager, "_db") and hasattr(manager, "_static") 

1715 manager._db.update( 

1716 manager._static.dataset_type, 

1717 {"name": datasetTypeName}, 

1718 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1719 ) 

1720 

1721 # Force reset of dataset type cache 

1722 butler.registry.refresh() 

1723 

1724 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1725 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1726 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1727 

1728 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1729 self.assertNotEqual(type(metric_model), type(metric)) 

1730 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1731 

1732 # Put the model and read it back to show that everything now 

1733 # works as normal. 

1734 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1735 metric_model_new = butler.get(metric_ref) 

1736 self.assertEqual(metric_model_new, metric_model) 

1737 

1738 # Hack the storage class again to something that will fail on the 

1739 # get with no conversion class. 

1740 manager._db.update( 

1741 manager._static.dataset_type, 

1742 {"name": datasetTypeName}, 

1743 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1744 ) 

1745 butler.registry.refresh() 

1746 

1747 with self.assertRaises(ValueError): 

1748 butler.get(datasetTypeName, dataId=dataId) 

1749 

1750 

1751@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1752class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1753 """PosixDatastore specialization of a butler using Postgres""" 

1754 

1755 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1756 fullConfigKey = ".datastore.formatters" 

1757 validationCanFail = True 

1758 datastoreStr = ["/tmp"] 

1759 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1760 registryStr = "PostgreSQL@test" 

1761 postgresql: Any 

1762 

1763 @staticmethod 

1764 def _handler(postgresql: Any) -> None: 

1765 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1766 with engine.begin() as connection: 

1767 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1768 

1769 @classmethod 

1770 def setUpClass(cls) -> None: 

1771 # Create the postgres test server. 

1772 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1773 cache_initialized_db=True, on_initialized=cls._handler 

1774 ) 

1775 super().setUpClass() 

1776 

1777 @classmethod 

1778 def tearDownClass(cls) -> None: 

1779 # Clean up any lingering SQLAlchemy engines/connections 

1780 # so they're closed before we shut down the server. 

1781 gc.collect() 

1782 cls.postgresql.clear_cache() 

1783 super().tearDownClass() 

1784 

1785 def setUp(self) -> None: 

1786 self.server = self.postgresql() 

1787 

1788 # Need to add a registry section to the config. 

1789 self._temp_config = False 

1790 config = Config(self.configFile) 

1791 config["registry", "db"] = self.server.url() 

1792 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1793 config.dump(fh) 

1794 self.configFile = fh.name 

1795 self._temp_config = True 

1796 super().setUp() 

1797 

1798 def tearDown(self) -> None: 

1799 self.server.stop() 

1800 if self._temp_config and os.path.exists(self.configFile): 

1801 os.remove(self.configFile) 

1802 super().tearDown() 

1803 

1804 def testMakeRepo(self) -> None: 

1805 # The base class test assumes that it's using sqlite and assumes 

1806 # the config file is acceptable to sqlite. 

1807 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1808 

1809 

1810class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1811 """InMemoryDatastore specialization of a butler""" 

1812 

1813 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1814 fullConfigKey = None 

1815 useTempRoot = False 

1816 validationCanFail = False 

1817 datastoreStr = ["datastore='InMemory"] 

1818 datastoreName = ["InMemoryDatastore@"] 

1819 registryStr = "/gen3.sqlite3" 

1820 

1821 def testIngest(self) -> None: 

1822 pass 

1823 

1824 

1825class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1826 """PosixDatastore specialization""" 

1827 

1828 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1829 fullConfigKey = ".datastore.datastores.1.formatters" 

1830 validationCanFail = True 

1831 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1832 datastoreName = [ 

1833 "InMemoryDatastore@", 

1834 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1835 "SecondDatastore", 

1836 ] 

1837 registryStr = "/gen3.sqlite3" 

1838 

1839 

1840class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1841 """Test that a yaml file in one location can refer to a root in another.""" 

1842 

1843 datastoreStr = ["dir1"] 

1844 # Disable the makeRepo test since we are deliberately not using 

1845 # butler.yaml as the config name. 

1846 fullConfigKey = None 

1847 

1848 def setUp(self) -> None: 

1849 self.root = makeTestTempDir(TESTDIR) 

1850 

1851 # Make a new repository in one place 

1852 self.dir1 = os.path.join(self.root, "dir1") 

1853 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1854 

1855 # Move the yaml file to a different place and add a "root" 

1856 self.dir2 = os.path.join(self.root, "dir2") 

1857 os.makedirs(self.dir2, exist_ok=True) 

1858 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1859 config = Config(configFile1) 

1860 config["root"] = self.dir1 

1861 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1862 config.dumpToUri(configFile2) 

1863 os.remove(configFile1) 

1864 self.tmpConfigFile = configFile2 

1865 

1866 def testFileLocations(self) -> None: 

1867 self.assertNotEqual(self.dir1, self.dir2) 

1868 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1869 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1870 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1871 

1872 

1873class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1874 """Test that a config file created by makeRepo outside of repo works.""" 

1875 

1876 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1877 

1878 def setUp(self) -> None: 

1879 self.root = makeTestTempDir(TESTDIR) 

1880 self.root2 = makeTestTempDir(TESTDIR) 

1881 

1882 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1883 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1884 

1885 def tearDown(self) -> None: 

1886 if os.path.exists(self.root2): 

1887 shutil.rmtree(self.root2, ignore_errors=True) 

1888 super().tearDown() 

1889 

1890 def testConfigExistence(self) -> None: 

1891 c = Config(self.tmpConfigFile) 

1892 uri_config = ResourcePath(c["root"]) 

1893 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1894 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1895 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1896 

1897 def testPutGet(self) -> None: 

1898 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1899 self.runPutGetTest(storageClass, "test_metric") 

1900 

1901 

1902class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1903 """Test that a config file created by makeRepo outside of repo works.""" 

1904 

1905 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1906 

1907 def setUp(self) -> None: 

1908 self.root = makeTestTempDir(TESTDIR) 

1909 self.root2 = makeTestTempDir(TESTDIR) 

1910 

1911 self.tmpConfigFile = self.root2 

1912 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1913 

1914 def testConfigExistence(self) -> None: 

1915 # Append the yaml file else Config constructor does not know the file 

1916 # type. 

1917 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1918 super().testConfigExistence() 

1919 

1920 

1921class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1922 """Test that a config file created by makeRepo outside of repo works.""" 

1923 

1924 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1925 

1926 def setUp(self) -> None: 

1927 self.root = makeTestTempDir(TESTDIR) 

1928 self.root2 = makeTestTempDir(TESTDIR) 

1929 

1930 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1931 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1932 

1933 

1934@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1935class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1936 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1937 a local in-memory SqlRegistry. 

1938 """ 

1939 

1940 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1941 fullConfigKey = None 

1942 validationCanFail = True 

1943 

1944 bucketName = "anybucketname" 

1945 """Name of the Bucket that will be used in the tests. The name is read from 

1946 the config file used with the tests during set-up. 

1947 """ 

1948 

1949 root = "butlerRoot/" 

1950 """Root repository directory expected to be used in case useTempRoot=False. 

1951 Otherwise the root is set to a 20 characters long randomly generated string 

1952 during set-up. 

1953 """ 

1954 

1955 datastoreStr = [f"datastore={root}"] 

1956 """Contains all expected root locations in a format expected to be 

1957 returned by Butler stringification. 

1958 """ 

1959 

1960 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1961 """The expected format of the S3 Datastore string.""" 

1962 

1963 registryStr = "/gen3.sqlite3" 

1964 """Expected format of the Registry string.""" 

1965 

1966 mock_s3 = mock_s3() 

1967 """The mocked s3 interface from moto.""" 

1968 

1969 def genRoot(self) -> str: 

1970 """Return a random string of len 20 to serve as a root 

1971 name for the temporary bucket repo. 

1972 

1973 This is equivalent to tempfile.mkdtemp as this is what self.root 

1974 becomes when useTempRoot is True. 

1975 """ 

1976 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1977 return rndstr + "/" 

1978 

1979 def setUp(self) -> None: 

1980 config = Config(self.configFile) 

1981 uri = ResourcePath(config[".datastore.datastore.root"]) 

1982 self.bucketName = uri.netloc 

1983 

1984 # Enable S3 mocking of tests. 

1985 self.mock_s3.start() 

1986 

1987 # set up some fake credentials if they do not exist 

1988 self.usingDummyCredentials = setAwsEnvCredentials() 

1989 

1990 if self.useTempRoot: 

1991 self.root = self.genRoot() 

1992 rooturi = f"s3://{self.bucketName}/{self.root}" 

1993 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1994 

1995 # need local folder to store registry database 

1996 self.reg_dir = makeTestTempDir(TESTDIR) 

1997 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1998 

1999 # MOTO needs to know that we expect Bucket bucketname to exist 

2000 # (this used to be the class attribute bucketName) 

2001 s3 = boto3.resource("s3") 

2002 s3.create_bucket(Bucket=self.bucketName) 

2003 

2004 self.datastoreStr = [f"datastore='{rooturi}'"] 

2005 self.datastoreName = [f"FileDatastore@{rooturi}"] 

2006 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

2007 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

2008 

2009 def tearDown(self) -> None: 

2010 s3 = boto3.resource("s3") 

2011 bucket = s3.Bucket(self.bucketName) 

2012 try: 

2013 bucket.objects.all().delete() 

2014 except botocore.exceptions.ClientError as e: 

2015 if e.response["Error"]["Code"] == "404": 

2016 # the key was not reachable - pass 

2017 pass 

2018 else: 

2019 raise 

2020 

2021 bucket = s3.Bucket(self.bucketName) 

2022 bucket.delete() 

2023 

2024 # Stop the S3 mock. 

2025 self.mock_s3.stop() 

2026 

2027 # unset any potentially set dummy credentials 

2028 if self.usingDummyCredentials: 

2029 unsetAwsEnvCredentials() 

2030 

2031 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

2032 shutil.rmtree(self.reg_dir, ignore_errors=True) 

2033 

2034 if self.useTempRoot and os.path.exists(self.root): 

2035 shutil.rmtree(self.root, ignore_errors=True) 

2036 

2037 super().tearDown() 

2038 

2039 

2040class PosixDatastoreTransfers(unittest.TestCase): 

2041 """Test data transfers between butlers. 

2042 

2043 Test for different managers. UUID to UUID and integer to integer are 

2044 tested. UUID to integer is not supported since we do not currently 

2045 want to allow that. Integer to UUID is supported with the caveat 

2046 that UUID4 will be generated and this will be incorrect for raw 

2047 dataset types. The test ignores that. 

2048 """ 

2049 

2050 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2051 storageClassFactory: StorageClassFactory 

2052 

2053 @classmethod 

2054 def setUpClass(cls) -> None: 

2055 cls.storageClassFactory = StorageClassFactory() 

2056 cls.storageClassFactory.addFromConfig(cls.configFile) 

2057 

2058 def setUp(self) -> None: 

2059 self.root = makeTestTempDir(TESTDIR) 

2060 self.config = Config(self.configFile) 

2061 

2062 def tearDown(self) -> None: 

2063 removeTestTempDir(self.root) 

2064 

2065 def create_butler(self, manager: str, label: str) -> Butler: 

2066 config = Config(self.configFile) 

2067 config["registry", "managers", "datasets"] = manager 

2068 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

2069 

2070 def create_butlers(self, manager1: str | None = None, manager2: str | None = None) -> None: 

2071 default = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

2072 if manager1 is None: 

2073 manager1 = default 

2074 if manager2 is None: 

2075 manager2 = default 

2076 self.source_butler = self.create_butler(manager1, "1") 

2077 self.target_butler = self.create_butler(manager2, "2") 

2078 

2079 def testTransferUuidToUuid(self) -> None: 

2080 self.create_butlers() 

2081 self.assertButlerTransfers() 

2082 

2083 def _enable_trust(self, datastore: Datastore) -> None: 

2084 datastores = getattr(datastore, "datastores", [datastore]) 

2085 for this_datastore in datastores: 

2086 if hasattr(this_datastore, "trustGetRequest"): 

2087 this_datastore.trustGetRequest = True 

2088 

2089 def testTransferMissing(self) -> None: 

2090 """Test transfers where datastore records are missing. 

2091 

2092 This is how execution butler works. 

2093 """ 

2094 self.create_butlers() 

2095 

2096 # Configure the source butler to allow trust. 

2097 self._enable_trust(self.source_butler._datastore) 

2098 

2099 self.assertButlerTransfers(purge=True) 

2100 

2101 def testTransferMissingDisassembly(self) -> None: 

2102 """Test transfers where datastore records are missing. 

2103 

2104 This is how execution butler works. 

2105 """ 

2106 self.create_butlers() 

2107 

2108 # Configure the source butler to allow trust. 

2109 self._enable_trust(self.source_butler._datastore) 

2110 

2111 # Test disassembly. 

2112 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

2113 

2114 def testAbsoluteURITransferDirect(self) -> None: 

2115 """Test transfer using an absolute URI.""" 

2116 self._absolute_transfer("auto") 

2117 

2118 def testAbsoluteURITransferCopy(self) -> None: 

2119 """Test transfer using an absolute URI.""" 

2120 self._absolute_transfer("copy") 

2121 

2122 def _absolute_transfer(self, transfer: str) -> None: 

2123 self.create_butlers() 

2124 

2125 storageClassName = "StructuredData" 

2126 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2127 datasetTypeName = "random_data" 

2128 run = "run1" 

2129 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2130 

2131 dimensions = self.source_butler.dimensions.extract(()) 

2132 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2133 self.source_butler.registry.registerDatasetType(datasetType) 

2134 

2135 metrics = makeExampleMetrics() 

2136 with ResourcePath.temporary_uri(suffix=".json") as temp: 

2137 dataId = DataCoordinate.makeEmpty(self.source_butler.dimensions) 

2138 source_refs = [DatasetRef(datasetType, dataId, run=run)] 

2139 temp.write(json.dumps(metrics.exportAsDict()).encode()) 

2140 dataset = FileDataset(path=temp, refs=source_refs) 

2141 self.source_butler.ingest(dataset, transfer="direct") 

2142 

2143 self.target_butler.transfer_from( 

2144 self.source_butler, dataset.refs, register_dataset_types=True, transfer=transfer 

2145 ) 

2146 

2147 uri = self.target_butler.getURI(dataset.refs[0]) 

2148 if transfer == "auto": 

2149 self.assertEqual(uri, temp) 

2150 else: 

2151 self.assertNotEqual(uri, temp) 

2152 

2153 def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "StructuredData") -> None: 

2154 """Test that a run can be transferred to another butler.""" 

2155 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2156 datasetTypeName = "random_data" 

2157 

2158 # Test will create 3 collections and we will want to transfer 

2159 # two of those three. 

2160 runs = ["run1", "run2", "other"] 

2161 

2162 # Also want to use two different dataset types to ensure that 

2163 # grouping works. 

2164 datasetTypeNames = ["random_data", "random_data_2"] 

2165 

2166 # Create the run collections in the source butler. 

2167 for run in runs: 

2168 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2169 

2170 # Create dimensions in source butler. 

2171 n_exposures = 30 

2172 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

2173 self.source_butler.registry.insertDimensionData( 

2174 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

2175 ) 

2176 self.source_butler.registry.insertDimensionData( 

2177 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

2178 ) 

2179 

2180 for i in range(n_exposures): 

2181 self.source_butler.registry.insertDimensionData( 

2182 "exposure", 

2183 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

2184 ) 

2185 

2186 # Create dataset types in the source butler. 

2187 dimensions = self.source_butler.dimensions.extract(["instrument", "exposure"]) 

2188 for datasetTypeName in datasetTypeNames: 

2189 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2190 self.source_butler.registry.registerDatasetType(datasetType) 

2191 

2192 # Write a dataset to an unrelated run -- this will ensure that 

2193 # we are rewriting integer dataset ids in the target if necessary. 

2194 # Will not be relevant for UUID. 

2195 run = "distraction" 

2196 butler = Butler(butler=self.source_butler, run=run) 

2197 butler.put( 

2198 makeExampleMetrics(), 

2199 datasetTypeName, 

2200 exposure=1, 

2201 instrument="DummyCamComp", 

2202 physical_filter="d-r", 

2203 ) 

2204 

2205 # Write some example metrics to the source 

2206 butler = Butler(butler=self.source_butler) 

2207 

2208 # Set of DatasetRefs that should be in the list of refs to transfer 

2209 # but which will not be transferred. 

2210 deleted: set[DatasetRef] = set() 

2211 

2212 n_expected = 20 # Number of datasets expected to be transferred 

2213 source_refs = [] 

2214 for i in range(n_exposures): 

2215 # Put a third of datasets into each collection, only retain 

2216 # two thirds. 

2217 index = i % 3 

2218 run = runs[index] 

2219 datasetTypeName = datasetTypeNames[i % 2] 

2220 

2221 metric = MetricsExample( 

2222 summary={"counter": i}, output={"text": "metric"}, data=[2 * x for x in range(i)] 

2223 ) 

2224 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2225 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2226 

2227 # Remove the datastore record using low-level API, but only 

2228 # for a specific index. 

2229 if purge and index == 1: 

2230 # For one of these delete the file as well. 

2231 # This allows the "missing" code to filter the 

2232 # file out. 

2233 # Access the individual datastores. 

2234 datastores = [] 

2235 if hasattr(butler._datastore, "datastores"): 

2236 datastores.extend(butler._datastore.datastores) 

2237 else: 

2238 datastores.append(butler._datastore) 

2239 

2240 if not deleted: 

2241 # For a chained datastore we need to remove 

2242 # files in each chain. 

2243 for datastore in datastores: 

2244 # The file might not be known to the datastore 

2245 # if constraints are used. 

2246 try: 

2247 primary, uris = datastore.getURIs(ref) 

2248 except FileNotFoundError: 

2249 continue 

2250 if primary and primary.scheme != "mem": 

2251 primary.remove() 

2252 for uri in uris.values(): 

2253 if uri.scheme != "mem": 

2254 uri.remove() 

2255 n_expected -= 1 

2256 deleted.add(ref) 

2257 

2258 # Remove the datastore record. 

2259 for datastore in datastores: 

2260 if hasattr(datastore, "removeStoredItemInfo"): 

2261 datastore.removeStoredItemInfo(ref) 

2262 

2263 if index < 2: 

2264 source_refs.append(ref) 

2265 if ref not in deleted: 

2266 new_metric = butler.get(ref) 

2267 self.assertEqual(new_metric, metric) 

2268 

2269 # Create some bad dataset types to ensure we check for inconsistent 

2270 # definitions. 

2271 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2272 for datasetTypeName in datasetTypeNames: 

2273 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2274 self.target_butler.registry.registerDatasetType(datasetType) 

2275 with self.assertRaises(ConflictingDefinitionError) as cm: 

2276 self.target_butler.transfer_from(self.source_butler, source_refs) 

2277 self.assertIn("dataset type differs", str(cm.exception)) 

2278 

2279 # And remove the bad definitions. 

2280 for datasetTypeName in datasetTypeNames: 

2281 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2282 

2283 # Transfer without creating dataset types should fail. 

2284 with self.assertRaises(KeyError): 

2285 self.target_butler.transfer_from(self.source_butler, source_refs) 

2286 

2287 # Transfer without creating dimensions should fail. 

2288 with self.assertRaises(ConflictingDefinitionError) as cm: 

2289 self.target_butler.transfer_from(self.source_butler, source_refs, register_dataset_types=True) 

2290 self.assertIn("dimension", str(cm.exception)) 

2291 

2292 # The failed transfer above leaves registry in an inconsistent 

2293 # state because the run is created but then rolled back without 

2294 # the collection cache being cleared. For now force a refresh. 

2295 # Can remove with DM-35498. 

2296 self.target_butler.registry.refresh() 

2297 

2298 # Now transfer them to the second butler, including dimensions. 

2299 with self.assertLogs(level=logging.DEBUG) as log_cm: 

2300 transferred = self.target_butler.transfer_from( 

2301 self.source_butler, 

2302 source_refs, 

2303 register_dataset_types=True, 

2304 transfer_dimensions=True, 

2305 ) 

2306 self.assertEqual(len(transferred), n_expected) 

2307 log_output = ";".join(log_cm.output) 

2308 

2309 # A ChainedDatastore will use the in-memory datastore for mexists 

2310 # so we can not rely on the mexists log message. 

2311 self.assertIn("Number of datastore records found in source", log_output) 

2312 self.assertIn("Creating output run", log_output) 

2313 

2314 # Do the transfer twice to ensure that it will do nothing extra. 

2315 # Only do this if purge=True because it does not work for int 

2316 # dataset_id. 

2317 if purge: 

2318 # This should not need to register dataset types. 

2319 transferred = self.target_butler.transfer_from(self.source_butler, source_refs) 

2320 self.assertEqual(len(transferred), n_expected) 

2321 

2322 # Also do an explicit low-level transfer to trigger some 

2323 # edge cases. 

2324 with self.assertLogs(level=logging.DEBUG) as log_cm: 

2325 self.target_butler._datastore.transfer_from(self.source_butler._datastore, source_refs) 

2326 log_output = ";".join(log_cm.output) 

2327 self.assertIn("no file artifacts exist", log_output) 

2328 

2329 with self.assertRaises((TypeError, AttributeError)): 

2330 self.target_butler._datastore.transfer_from(self.source_butler, source_refs) # type: ignore 

2331 

2332 with self.assertRaises(ValueError): 

2333 self.target_butler._datastore.transfer_from( 

2334 self.source_butler._datastore, source_refs, transfer="split" 

2335 ) 

2336 

2337 # Now try to get the same refs from the new butler. 

2338 for ref in source_refs: 

2339 if ref not in deleted: 

2340 new_metric = self.target_butler.get(ref) 

2341 old_metric = self.source_butler.get(ref) 

2342 self.assertEqual(new_metric, old_metric) 

2343 

2344 # Now prune run2 collection and create instead a CHAINED collection. 

2345 # This should block the transfer. 

2346 self.target_butler.removeRuns(["run2"], unstore=True) 

2347 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2348 with self.assertRaises(CollectionTypeError): 

2349 # Re-importing the run1 datasets can be problematic if they 

2350 # use integer IDs so filter those out. 

2351 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2352 self.target_butler.transfer_from(self.source_butler, to_transfer) 

2353 

2354 

2355class ChainedDatastoreTransfers(PosixDatastoreTransfers): 

2356 """Test transfers using a chained datastore.""" 

2357 

2358 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

2359 

2360 

2361class NullDatastoreTestCase(unittest.TestCase): 

2362 """Test that we can fall back to a null datastore.""" 

2363 

2364 # Need a good config to create the repo. 

2365 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2366 storageClassFactory: StorageClassFactory 

2367 

2368 @classmethod 

2369 def setUpClass(cls) -> None: 

2370 cls.storageClassFactory = StorageClassFactory() 

2371 cls.storageClassFactory.addFromConfig(cls.configFile) 

2372 

2373 def setUp(self) -> None: 

2374 """Create a new butler root for each test.""" 

2375 self.root = makeTestTempDir(TESTDIR) 

2376 Butler.makeRepo(self.root, config=Config(self.configFile)) 

2377 

2378 def tearDown(self) -> None: 

2379 removeTestTempDir(self.root) 

2380 

2381 def test_fallback(self) -> None: 

2382 # Read the butler config and mess with the datastore section. 

2383 bad_config = Config(os.path.join(self.root, "butler.yaml")) 

2384 bad_config["datastore", "cls"] = "lsst.not.a.datastore.Datastore" 

2385 

2386 with self.assertRaises(RuntimeError): 

2387 Butler(bad_config) 

2388 

2389 butler = Butler(bad_config, writeable=True, without_datastore=True) 

2390 self.assertIsInstance(butler._datastore, NullDatastore) 

2391 

2392 # Check that registry is working. 

2393 butler.registry.registerRun("MYRUN") 

2394 collections = butler.registry.queryCollections(...) 

2395 self.assertIn("MYRUN", set(collections)) 

2396 

2397 # Create a ref. 

2398 dimensions = butler.dimensions.extract([]) 

2399 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

2400 datasetTypeName = "metric" 

2401 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2402 butler.registry.registerDatasetType(datasetType) 

2403 ref = DatasetRef(datasetType, {}, run="MYRUN") 

2404 

2405 # Check that datastore will complain. 

2406 with self.assertRaises(FileNotFoundError): 

2407 butler.get(ref) 

2408 with self.assertRaises(FileNotFoundError): 

2409 butler.getURI(ref) 

2410 

2411 

2412def setup_module(module: types.ModuleType) -> None: 

2413 """Set up the module for pytest.""" 

2414 clean_environment() 

2415 

2416 

2417if __name__ == "__main__": 

2418 clean_environment() 

2419 unittest.main()