Coverage for tests/test_butler.py: 15%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1035 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import logging 

26import os 

27import posixpath 

28import unittest 

29import tempfile 

30import shutil 

31import pickle 

32import string 

33import random 

34import time 

35import socket 

36 

37try: 

38 import boto3 

39 import botocore 

40 from moto import mock_s3 

41except ImportError: 

42 boto3 = None 

43 

44 def mock_s3(cls): 

45 """A no-op decorator in case moto mock_s3 can not be imported. 

46 """ 

47 return cls 

48 

49try: 

50 from cheroot import wsgi 

51 from wsgidav.wsgidav_app import WsgiDAVApp 

52except ImportError: 

53 WsgiDAVApp = None 

54 

55import astropy.time 

56from threading import Thread 

57from tempfile import gettempdir 

58from lsst.utils import doImport 

59from lsst.daf.butler import Butler, Config, ButlerConfig 

60from lsst.daf.butler import StorageClassFactory 

61from lsst.daf.butler import DatasetType, DatasetRef, DatasetIdGenEnum 

62from lsst.daf.butler import FileTemplateValidationError, ValidationError 

63from lsst.daf.butler import FileDataset 

64from lsst.daf.butler import CollectionSearch, CollectionType 

65from lsst.daf.butler import ButlerURI 

66from lsst.daf.butler import script 

67from lsst.daf.butler.registry import MissingCollectionError, ConflictingDefinitionError 

68from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

69from lsst.daf.butler.core._butlerUri.s3utils import (setAwsEnvCredentials, 

70 unsetAwsEnvCredentials) 

71from lsst.daf.butler.core._butlerUri.http import isWebdavEndpoint 

72 

73from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample 

74from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

75 

76TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

77 

78 

79def makeExampleMetrics(): 

80 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

81 {"a": [1, 2, 3], 

82 "b": {"blue": 5, "red": "green"}}, 

83 [563, 234, 456.7, 752, 8, 9, 27] 

84 ) 

85 

86 

87class TransactionTestError(Exception): 

88 """Specific error for testing transactions, to prevent misdiagnosing 

89 that might otherwise occur when a standard exception is used. 

90 """ 

91 pass 

92 

93 

94class ButlerConfigTests(unittest.TestCase): 

95 """Simple tests for ButlerConfig that are not tested in other test cases. 

96 """ 

97 

98 def testSearchPath(self): 

99 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

100 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

101 config1 = ButlerConfig(configFile) 

102 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

103 

104 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

105 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

106 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

107 self.assertIn("testConfigs", "\n".join(cm.output)) 

108 

109 key = ("datastore", "records", "table") 

110 self.assertNotEqual(config1[key], config2[key]) 

111 self.assertEqual(config2[key], "override_record") 

112 

113 

114class ButlerPutGetTests: 

115 """Helper method for running a suite of put/get tests from different 

116 butler configurations.""" 

117 

118 root = None 

119 

120 @staticmethod 

121 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

122 """Create a DatasetType and register it 

123 """ 

124 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

125 registry.registerDatasetType(datasetType) 

126 return datasetType 

127 

128 @classmethod 

129 def setUpClass(cls): 

130 cls.storageClassFactory = StorageClassFactory() 

131 cls.storageClassFactory.addFromConfig(cls.configFile) 

132 

133 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

134 datasetType = datasetRef.datasetType 

135 dataId = datasetRef.dataId 

136 deferred = butler.getDirectDeferred(datasetRef) 

137 

138 for component in components: 

139 compTypeName = datasetType.componentTypeName(component) 

140 result = butler.get(compTypeName, dataId, collections=collections) 

141 self.assertEqual(result, getattr(reference, component)) 

142 result_deferred = deferred.get(component=component) 

143 self.assertEqual(result_deferred, result) 

144 

145 def tearDown(self): 

146 removeTestTempDir(self.root) 

147 

148 def runPutGetTest(self, storageClass, datasetTypeName): 

149 # New datasets will be added to run and tag, but we will only look in 

150 # tag when looking up datasets. 

151 run = "ingest" 

152 butler = Butler(self.tmpConfigFile, run=run) 

153 

154 collections = set(butler.registry.queryCollections()) 

155 self.assertEqual(collections, set([run])) 

156 

157 # Create and register a DatasetType 

158 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

159 

160 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

161 

162 # Add needed Dimensions 

163 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

164 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

165 "name": "d-r", 

166 "band": "R"}) 

167 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp", 

168 "id": 1, 

169 "name": "default"}) 

170 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

171 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

172 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

173 "name": "fourtwentythree", "physical_filter": "d-r", 

174 "visit_system": 1, "datetime_begin": visit_start, 

175 "datetime_end": visit_end}) 

176 

177 # Add a second visit for some later tests 

178 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424, 

179 "name": "fourtwentyfour", "physical_filter": "d-r", 

180 "visit_system": 1}) 

181 

182 # Create and store a dataset 

183 metric = makeExampleMetrics() 

184 dataId = {"instrument": "DummyCamComp", "visit": 423} 

185 

186 # Create a DatasetRef for put 

187 refIn = DatasetRef(datasetType, dataId, id=None) 

188 

189 # Put with a preexisting id should fail 

190 with self.assertRaises(ValueError): 

191 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

192 

193 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

194 # and once with a DatasetType 

195 

196 # Keep track of any collections we add and do not clean up 

197 expected_collections = {run} 

198 

199 counter = 0 

200 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

201 # Since we are using subTest we can get cascading failures 

202 # here with the first attempt failing and the others failing 

203 # immediately because the dataset already exists. Work around 

204 # this by using a distinct run collection each time 

205 counter += 1 

206 this_run = f"put_run_{counter}" 

207 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

208 expected_collections.update({this_run}) 

209 

210 with self.subTest(args=args): 

211 ref = butler.put(metric, *args, run=this_run) 

212 self.assertIsInstance(ref, DatasetRef) 

213 

214 # Test getDirect 

215 metricOut = butler.getDirect(ref) 

216 self.assertEqual(metric, metricOut) 

217 # Test get 

218 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

219 self.assertEqual(metric, metricOut) 

220 # Test get with a datasetRef 

221 metricOut = butler.get(ref, collections=this_run) 

222 self.assertEqual(metric, metricOut) 

223 # Test getDeferred with dataId 

224 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

225 self.assertEqual(metric, metricOut) 

226 # Test getDeferred with a datasetRef 

227 metricOut = butler.getDeferred(ref, collections=this_run).get() 

228 self.assertEqual(metric, metricOut) 

229 # and deferred direct with ref 

230 metricOut = butler.getDirectDeferred(ref).get() 

231 self.assertEqual(metric, metricOut) 

232 

233 # Check we can get components 

234 if storageClass.isComposite(): 

235 self.assertGetComponents(butler, ref, 

236 ("summary", "data", "output"), metric, 

237 collections=this_run) 

238 

239 # Can the artifacts themselves be retrieved? 

240 if not butler.datastore.isEphemeral: 

241 root_uri = ButlerURI(self.root) 

242 

243 for preserve_path in (True, False): 

244 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

245 # Use copy so that we can test that overwrite 

246 # protection works (using "auto" for File URIs would 

247 # use hard links and subsequent transfer would work 

248 # because it knows they are the same file). 

249 transferred = butler.retrieveArtifacts([ref], destination, 

250 preserve_path=preserve_path, transfer="copy") 

251 self.assertGreater(len(transferred), 0) 

252 artifacts = list(ButlerURI.findFileResources([destination])) 

253 self.assertEqual(set(transferred), set(artifacts)) 

254 

255 for artifact in transferred: 

256 path_in_destination = artifact.relative_to(destination) 

257 self.assertIsNotNone(path_in_destination) 

258 

259 # when path is not preserved there should not be 

260 # any path separators. 

261 num_seps = path_in_destination.count("/") 

262 if preserve_path: 

263 self.assertGreater(num_seps, 0) 

264 else: 

265 self.assertEqual(num_seps, 0) 

266 

267 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

268 n_uris = len(secondary_uris) 

269 if primary_uri: 

270 n_uris += 1 

271 self.assertEqual(len(artifacts), n_uris, "Comparing expected artifacts vs actual:" 

272 f" {artifacts} vs {primary_uri} and {secondary_uris}") 

273 

274 if preserve_path: 

275 # No need to run these twice 

276 with self.assertRaises(ValueError): 

277 butler.retrieveArtifacts([ref], destination, transfer="move") 

278 

279 with self.assertRaises(FileExistsError): 

280 butler.retrieveArtifacts([ref], destination) 

281 

282 transferred_again = butler.retrieveArtifacts([ref], destination, 

283 preserve_path=preserve_path, 

284 overwrite=True) 

285 self.assertEqual(set(transferred_again), set(transferred)) 

286 

287 # Now remove the dataset completely. 

288 butler.pruneDatasets([ref], purge=True, unstore=True, run=this_run) 

289 # Lookup with original args should still fail. 

290 with self.assertRaises(LookupError): 

291 butler.datasetExists(*args, collections=this_run) 

292 # getDirect() should still fail. 

293 with self.assertRaises(FileNotFoundError): 

294 butler.getDirect(ref) 

295 # Registry shouldn't be able to find it by dataset_id anymore. 

296 self.assertIsNone(butler.registry.getDataset(ref.id)) 

297 

298 # Do explicit registry removal since we know they are 

299 # empty 

300 butler.registry.removeCollection(this_run) 

301 expected_collections.remove(this_run) 

302 

303 # Put the dataset again, since the last thing we did was remove it 

304 # and we want to use the default collection. 

305 ref = butler.put(metric, refIn) 

306 

307 # Get with parameters 

308 stop = 4 

309 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

310 self.assertNotEqual(metric, sliced) 

311 self.assertEqual(metric.summary, sliced.summary) 

312 self.assertEqual(metric.output, sliced.output) 

313 self.assertEqual(metric.data[:stop], sliced.data) 

314 # getDeferred with parameters 

315 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

316 self.assertNotEqual(metric, sliced) 

317 self.assertEqual(metric.summary, sliced.summary) 

318 self.assertEqual(metric.output, sliced.output) 

319 self.assertEqual(metric.data[:stop], sliced.data) 

320 # getDeferred with deferred parameters 

321 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

322 self.assertNotEqual(metric, sliced) 

323 self.assertEqual(metric.summary, sliced.summary) 

324 self.assertEqual(metric.output, sliced.output) 

325 self.assertEqual(metric.data[:stop], sliced.data) 

326 

327 if storageClass.isComposite(): 

328 # Check that components can be retrieved 

329 metricOut = butler.get(ref.datasetType.name, dataId) 

330 compNameS = ref.datasetType.componentTypeName("summary") 

331 compNameD = ref.datasetType.componentTypeName("data") 

332 summary = butler.get(compNameS, dataId) 

333 self.assertEqual(summary, metric.summary) 

334 data = butler.get(compNameD, dataId) 

335 self.assertEqual(data, metric.data) 

336 

337 if "counter" in storageClass.derivedComponents: 

338 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

339 self.assertEqual(count, len(data)) 

340 

341 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId, 

342 parameters={"slice": slice(stop)}) 

343 self.assertEqual(count, stop) 

344 

345 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

346 summary = butler.getDirect(compRef) 

347 self.assertEqual(summary, metric.summary) 

348 

349 # Create a Dataset type that has the same name but is inconsistent. 

350 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions, 

351 self.storageClassFactory.getStorageClass("Config")) 

352 

353 # Getting with a dataset type that does not match registry fails 

354 with self.assertRaises(ValueError): 

355 butler.get(inconsistentDatasetType, dataId) 

356 

357 # Combining a DatasetRef with a dataId should fail 

358 with self.assertRaises(ValueError): 

359 butler.get(ref, dataId) 

360 # Getting with an explicit ref should fail if the id doesn't match 

361 with self.assertRaises(ValueError): 

362 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

363 

364 # Getting a dataset with unknown parameters should fail 

365 with self.assertRaises(KeyError): 

366 butler.get(ref, parameters={"unsupported": True}) 

367 

368 # Check we have a collection 

369 collections = set(butler.registry.queryCollections()) 

370 self.assertEqual(collections, expected_collections) 

371 

372 # Clean up to check that we can remove something that may have 

373 # already had a component removed 

374 butler.pruneDatasets([ref], unstore=True, purge=True) 

375 

376 # Check that we can configure a butler to accept a put even 

377 # if it already has the dataset in registry. 

378 ref = butler.put(metric, refIn) 

379 

380 # Repeat put will fail. 

381 with self.assertRaises(ConflictingDefinitionError): 

382 butler.put(metric, refIn) 

383 

384 # Remove the datastore entry. 

385 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

386 

387 # Put will still fail 

388 with self.assertRaises(ConflictingDefinitionError): 

389 butler.put(metric, refIn) 

390 

391 # Allow the put to succeed 

392 butler._allow_put_of_predefined_dataset = True 

393 ref2 = butler.put(metric, refIn) 

394 self.assertEqual(ref2.id, ref.id) 

395 

396 # A second put will still fail but with a different exception 

397 # than before. 

398 with self.assertRaises(ConflictingDefinitionError): 

399 butler.put(metric, refIn) 

400 

401 # Reset the flag to avoid confusion 

402 butler._allow_put_of_predefined_dataset = False 

403 

404 # Leave the dataset in place since some downstream tests require 

405 # something to be present 

406 

407 return butler 

408 

409 def testDeferredCollectionPassing(self): 

410 # Construct a butler with no run or collection, but make it writeable. 

411 butler = Butler(self.tmpConfigFile, writeable=True) 

412 # Create and register a DatasetType 

413 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

414 datasetType = self.addDatasetType("example", dimensions, 

415 self.storageClassFactory.getStorageClass("StructuredData"), 

416 butler.registry) 

417 # Add needed Dimensions 

418 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

419 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

420 "name": "d-r", 

421 "band": "R"}) 

422 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

423 "name": "fourtwentythree", "physical_filter": "d-r"}) 

424 dataId = {"instrument": "DummyCamComp", "visit": 423} 

425 # Create dataset. 

426 metric = makeExampleMetrics() 

427 # Register a new run and put dataset. 

428 run = "deferred" 

429 self.assertTrue(butler.registry.registerRun(run)) 

430 # Second time it will be allowed but indicate no-op 

431 self.assertFalse(butler.registry.registerRun(run)) 

432 ref = butler.put(metric, datasetType, dataId, run=run) 

433 # Putting with no run should fail with TypeError. 

434 with self.assertRaises(TypeError): 

435 butler.put(metric, datasetType, dataId) 

436 # Dataset should exist. 

437 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

438 # We should be able to get the dataset back, but with and without 

439 # a deferred dataset handle. 

440 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

441 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

442 # Trying to find the dataset without any collection is a TypeError. 

443 with self.assertRaises(TypeError): 

444 butler.datasetExists(datasetType, dataId) 

445 with self.assertRaises(TypeError): 

446 butler.get(datasetType, dataId) 

447 # Associate the dataset with a different collection. 

448 butler.registry.registerCollection("tagged") 

449 butler.registry.associate("tagged", [ref]) 

450 # Deleting the dataset from the new collection should make it findable 

451 # in the original collection. 

452 butler.pruneDatasets([ref], tags=["tagged"]) 

453 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

454 

455 

456class ButlerTests(ButlerPutGetTests): 

457 """Tests for Butler. 

458 """ 

459 useTempRoot = True 

460 

461 def setUp(self): 

462 """Create a new butler root for each test.""" 

463 self.root = makeTestTempDir(TESTDIR) 

464 Butler.makeRepo(self.root, config=Config(self.configFile)) 

465 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

466 

467 def testConstructor(self): 

468 """Independent test of constructor. 

469 """ 

470 butler = Butler(self.tmpConfigFile, run="ingest") 

471 self.assertIsInstance(butler, Butler) 

472 

473 collections = set(butler.registry.queryCollections()) 

474 self.assertEqual(collections, {"ingest"}) 

475 

476 butler2 = Butler(butler=butler, collections=["other"]) 

477 self.assertEqual( 

478 butler2.collections, 

479 CollectionSearch.fromExpression(["other"]) 

480 ) 

481 self.assertIsNone(butler2.run) 

482 self.assertIs(butler.datastore, butler2.datastore) 

483 

484 def testBasicPutGet(self): 

485 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

486 self.runPutGetTest(storageClass, "test_metric") 

487 

488 def testCompositePutGetConcrete(self): 

489 

490 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

491 butler = self.runPutGetTest(storageClass, "test_metric") 

492 

493 # Should *not* be disassembled 

494 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

495 self.assertEqual(len(datasets), 1) 

496 uri, components = butler.getURIs(datasets[0]) 

497 self.assertIsInstance(uri, ButlerURI) 

498 self.assertFalse(components) 

499 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

500 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

501 

502 # Predicted dataset 

503 dataId = {"instrument": "DummyCamComp", "visit": 424} 

504 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

505 self.assertFalse(components) 

506 self.assertIsInstance(uri, ButlerURI) 

507 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

508 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

509 

510 def testCompositePutGetVirtual(self): 

511 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

512 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

513 

514 # Should be disassembled 

515 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

516 self.assertEqual(len(datasets), 1) 

517 uri, components = butler.getURIs(datasets[0]) 

518 

519 if butler.datastore.isEphemeral: 

520 # Never disassemble in-memory datastore 

521 self.assertIsInstance(uri, ButlerURI) 

522 self.assertFalse(components) 

523 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

524 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

525 else: 

526 self.assertIsNone(uri) 

527 self.assertEqual(set(components), set(storageClass.components)) 

528 for compuri in components.values(): 

529 self.assertIsInstance(compuri, ButlerURI) 

530 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

531 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

532 

533 # Predicted dataset 

534 dataId = {"instrument": "DummyCamComp", "visit": 424} 

535 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

536 

537 if butler.datastore.isEphemeral: 

538 # Never disassembled 

539 self.assertIsInstance(uri, ButlerURI) 

540 self.assertFalse(components) 

541 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

542 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

543 else: 

544 self.assertIsNone(uri) 

545 self.assertEqual(set(components), set(storageClass.components)) 

546 for compuri in components.values(): 

547 self.assertIsInstance(compuri, ButlerURI) 

548 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

549 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

550 

551 def testIngest(self): 

552 butler = Butler(self.tmpConfigFile, run="ingest") 

553 

554 # Create and register a DatasetType 

555 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

556 

557 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

558 datasetTypeName = "metric" 

559 

560 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

561 

562 # Add needed Dimensions 

563 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

564 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

565 "name": "d-r", 

566 "band": "R"}) 

567 for detector in (1, 2): 

568 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector, 

569 "full_name": f"detector{detector}"}) 

570 

571 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, 

572 "name": "fourtwentythree", "physical_filter": "d-r"}, 

573 {"instrument": "DummyCamComp", "id": 424, 

574 "name": "fourtwentyfour", "physical_filter": "d-r"}) 

575 

576 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

577 dataRoot = os.path.join(TESTDIR, "data", "basic") 

578 datasets = [] 

579 for detector in (1, 2): 

580 detector_name = f"detector_{detector}" 

581 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

582 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

583 # Create a DatasetRef for ingest 

584 refIn = DatasetRef(datasetType, dataId, id=None) 

585 

586 datasets.append(FileDataset(path=metricFile, 

587 refs=[refIn], 

588 formatter=formatter)) 

589 

590 butler.ingest(*datasets, transfer="copy") 

591 

592 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

593 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

594 

595 metrics1 = butler.get(datasetTypeName, dataId1) 

596 metrics2 = butler.get(datasetTypeName, dataId2) 

597 self.assertNotEqual(metrics1, metrics2) 

598 

599 # Compare URIs 

600 uri1 = butler.getURI(datasetTypeName, dataId1) 

601 uri2 = butler.getURI(datasetTypeName, dataId2) 

602 self.assertNotEqual(uri1, uri2) 

603 

604 # Now do a multi-dataset but single file ingest 

605 metricFile = os.path.join(dataRoot, "detectors.yaml") 

606 refs = [] 

607 for detector in (1, 2): 

608 detector_name = f"detector_{detector}" 

609 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

610 # Create a DatasetRef for ingest 

611 refs.append(DatasetRef(datasetType, dataId, id=None)) 

612 

613 datasets = [] 

614 datasets.append(FileDataset(path=metricFile, 

615 refs=refs, 

616 formatter=MultiDetectorFormatter)) 

617 

618 butler.ingest(*datasets, transfer="copy") 

619 

620 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

621 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

622 

623 multi1 = butler.get(datasetTypeName, dataId1) 

624 multi2 = butler.get(datasetTypeName, dataId2) 

625 

626 self.assertEqual(multi1, metrics1) 

627 self.assertEqual(multi2, metrics2) 

628 

629 # Compare URIs 

630 uri1 = butler.getURI(datasetTypeName, dataId1) 

631 uri2 = butler.getURI(datasetTypeName, dataId2) 

632 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

633 

634 # Test that removing one does not break the second 

635 # This line will issue a warning log message for a ChainedDatastore 

636 # that uses an InMemoryDatastore since in-memory can not ingest 

637 # files. 

638 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

639 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

640 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

641 multi2b = butler.get(datasetTypeName, dataId2) 

642 self.assertEqual(multi2, multi2b) 

643 

644 def testPruneCollections(self): 

645 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

646 butler = Butler(self.tmpConfigFile, writeable=True) 

647 # Load registry data with dimensions to hang datasets off of. 

648 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

649 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

650 # Add some RUN-type collections. 

651 run1 = "run1" 

652 butler.registry.registerRun(run1) 

653 run2 = "run2" 

654 butler.registry.registerRun(run2) 

655 # put some datasets. ref1 and ref2 have the same data ID, and are in 

656 # different runs. ref3 has a different data ID. 

657 metric = makeExampleMetrics() 

658 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

659 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

660 butler.registry) 

661 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

662 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

663 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

664 

665 # Try to delete a RUN collection without purge, or with purge and not 

666 # unstore. 

667 with self.assertRaises(TypeError): 

668 butler.pruneCollection(run1) 

669 with self.assertRaises(TypeError): 

670 butler.pruneCollection(run2, purge=True) 

671 # Add a TAGGED collection and associate ref3 only into it. 

672 tag1 = "tag1" 

673 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

674 self.assertTrue(registered) 

675 # Registering a second time should be allowed. 

676 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

677 self.assertFalse(registered) 

678 butler.registry.associate(tag1, [ref3]) 

679 # Add a CHAINED collection that searches run1 and then run2. It 

680 # logically contains only ref1, because ref2 is shadowed due to them 

681 # having the same data ID and dataset type. 

682 chain1 = "chain1" 

683 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

684 butler.registry.setCollectionChain(chain1, [run1, run2]) 

685 # Try to delete RUN collections, which should fail with complete 

686 # rollback because they're still referenced by the CHAINED 

687 # collection. 

688 with self.assertRaises(Exception): 

689 butler.pruneCollection(run1, pruge=True, unstore=True) 

690 with self.assertRaises(Exception): 

691 butler.pruneCollection(run2, pruge=True, unstore=True) 

692 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

693 [ref1, ref2, ref3]) 

694 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

695 self.assertTrue(existence[ref1]) 

696 self.assertTrue(existence[ref2]) 

697 self.assertTrue(existence[ref3]) 

698 # Try to delete CHAINED and TAGGED collections with purge; should not 

699 # work. 

700 with self.assertRaises(TypeError): 

701 butler.pruneCollection(tag1, purge=True, unstore=True) 

702 with self.assertRaises(TypeError): 

703 butler.pruneCollection(chain1, purge=True, unstore=True) 

704 # Remove the tagged collection with unstore=False. This should not 

705 # affect the datasets. 

706 butler.pruneCollection(tag1) 

707 with self.assertRaises(MissingCollectionError): 

708 butler.registry.getCollectionType(tag1) 

709 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

710 [ref1, ref2, ref3]) 

711 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

712 self.assertTrue(existence[ref1]) 

713 self.assertTrue(existence[ref2]) 

714 self.assertTrue(existence[ref3]) 

715 # Add the tagged collection back in, and remove it with unstore=True. 

716 # This should remove ref3 only from the datastore. 

717 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

718 butler.registry.associate(tag1, [ref3]) 

719 butler.pruneCollection(tag1, unstore=True) 

720 with self.assertRaises(MissingCollectionError): 

721 butler.registry.getCollectionType(tag1) 

722 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

723 [ref1, ref2, ref3]) 

724 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

725 self.assertTrue(existence[ref1]) 

726 self.assertTrue(existence[ref2]) 

727 self.assertFalse(existence[ref3]) 

728 # Delete the chain with unstore=False. The datasets should not be 

729 # affected at all. 

730 butler.pruneCollection(chain1) 

731 with self.assertRaises(MissingCollectionError): 

732 butler.registry.getCollectionType(chain1) 

733 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

734 [ref1, ref2, ref3]) 

735 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

736 self.assertTrue(existence[ref1]) 

737 self.assertTrue(existence[ref2]) 

738 self.assertFalse(existence[ref3]) 

739 # Redefine and then delete the chain with unstore=True. Only ref1 

740 # should be unstored (ref3 has already been unstored, but otherwise 

741 # would be now). 

742 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

743 butler.registry.setCollectionChain(chain1, [run1, run2]) 

744 butler.pruneCollection(chain1, unstore=True) 

745 with self.assertRaises(MissingCollectionError): 

746 butler.registry.getCollectionType(chain1) 

747 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

748 [ref1, ref2, ref3]) 

749 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

750 self.assertFalse(existence[ref1]) 

751 self.assertTrue(existence[ref2]) 

752 self.assertFalse(existence[ref3]) 

753 # Remove run1. This removes ref1 and ref3 from the registry (they're 

754 # already gone from the datastore, which is fine). 

755 butler.pruneCollection(run1, purge=True, unstore=True) 

756 with self.assertRaises(MissingCollectionError): 

757 butler.registry.getCollectionType(run1) 

758 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

759 [ref2]) 

760 self.assertTrue(butler.datastore.exists(ref2)) 

761 # Remove run2. This removes ref2 from the registry and the datastore. 

762 butler.pruneCollection(run2, purge=True, unstore=True) 

763 with self.assertRaises(MissingCollectionError): 

764 butler.registry.getCollectionType(run2) 

765 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), 

766 []) 

767 

768 # Now that the collections have been pruned we can remove the 

769 # dataset type 

770 butler.registry.removeDatasetType(datasetType.name) 

771 

772 def testPickle(self): 

773 """Test pickle support. 

774 """ 

775 butler = Butler(self.tmpConfigFile, run="ingest") 

776 butlerOut = pickle.loads(pickle.dumps(butler)) 

777 self.assertIsInstance(butlerOut, Butler) 

778 self.assertEqual(butlerOut._config, butler._config) 

779 self.assertEqual(butlerOut.collections, butler.collections) 

780 self.assertEqual(butlerOut.run, butler.run) 

781 

782 def testGetDatasetTypes(self): 

783 butler = Butler(self.tmpConfigFile, run="ingest") 

784 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

785 dimensionEntries = [ 

786 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"}, 

787 {"instrument": "DummyCamComp"}), 

788 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

789 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}) 

790 ] 

791 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

792 # Add needed Dimensions 

793 for args in dimensionEntries: 

794 butler.registry.insertDimensionData(*args) 

795 

796 # When a DatasetType is added to the registry entries are not created 

797 # for components but querying them can return the components. 

798 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

799 components = set() 

800 for datasetTypeName in datasetTypeNames: 

801 # Create and register a DatasetType 

802 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

803 

804 for componentName in storageClass.components: 

805 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

806 

807 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

808 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

809 

810 # Now that we have some dataset types registered, validate them 

811 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

812 "datasetType.component", "random_data", "random_data_2"]) 

813 

814 # Add a new datasetType that will fail template validation 

815 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

816 if self.validationCanFail: 

817 with self.assertRaises(ValidationError): 

818 butler.validateConfiguration() 

819 

820 # Rerun validation but with a subset of dataset type names 

821 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

822 

823 # Rerun validation but ignore the bad datasetType 

824 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC", 

825 "datasetType.component", "random_data", "random_data_2"]) 

826 

827 def testTransaction(self): 

828 butler = Butler(self.tmpConfigFile, run="ingest") 

829 datasetTypeName = "test_metric" 

830 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

831 dimensionEntries = (("instrument", {"instrument": "DummyCam"}), 

832 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", 

833 "band": "R"}), 

834 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", 

835 "physical_filter": "d-r"})) 

836 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

837 metric = makeExampleMetrics() 

838 dataId = {"instrument": "DummyCam", "visit": 42} 

839 # Create and register a DatasetType 

840 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

841 with self.assertRaises(TransactionTestError): 

842 with butler.transaction(): 

843 # Add needed Dimensions 

844 for args in dimensionEntries: 

845 butler.registry.insertDimensionData(*args) 

846 # Store a dataset 

847 ref = butler.put(metric, datasetTypeName, dataId) 

848 self.assertIsInstance(ref, DatasetRef) 

849 # Test getDirect 

850 metricOut = butler.getDirect(ref) 

851 self.assertEqual(metric, metricOut) 

852 # Test get 

853 metricOut = butler.get(datasetTypeName, dataId) 

854 self.assertEqual(metric, metricOut) 

855 # Check we can get components 

856 self.assertGetComponents(butler, ref, 

857 ("summary", "data", "output"), metric) 

858 raise TransactionTestError("This should roll back the entire transaction") 

859 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"): 

860 butler.registry.expandDataId(dataId) 

861 # Should raise LookupError for missing data ID value 

862 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

863 butler.get(datasetTypeName, dataId) 

864 # Also check explicitly if Dataset entry is missing 

865 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

866 # Direct retrieval should not find the file in the Datastore 

867 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

868 butler.getDirect(ref) 

869 

870 def testMakeRepo(self): 

871 """Test that we can write butler configuration to a new repository via 

872 the Butler.makeRepo interface and then instantiate a butler from the 

873 repo root. 

874 """ 

875 # Do not run the test if we know this datastore configuration does 

876 # not support a file system root 

877 if self.fullConfigKey is None: 

878 return 

879 

880 # create two separate directories 

881 root1 = tempfile.mkdtemp(dir=self.root) 

882 root2 = tempfile.mkdtemp(dir=self.root) 

883 

884 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

885 limited = Config(self.configFile) 

886 butler1 = Butler(butlerConfig) 

887 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

888 full = Config(self.tmpConfigFile) 

889 butler2 = Butler(butlerConfig) 

890 # Butlers should have the same configuration regardless of whether 

891 # defaults were expanded. 

892 self.assertEqual(butler1._config, butler2._config) 

893 # Config files loaded directly should not be the same. 

894 self.assertNotEqual(limited, full) 

895 # Make sure "limited" doesn't have a few keys we know it should be 

896 # inheriting from defaults. 

897 self.assertIn(self.fullConfigKey, full) 

898 self.assertNotIn(self.fullConfigKey, limited) 

899 

900 # Collections don't appear until something is put in them 

901 collections1 = set(butler1.registry.queryCollections()) 

902 self.assertEqual(collections1, set()) 

903 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

904 

905 # Check that a config with no associated file name will not 

906 # work properly with relocatable Butler repo 

907 butlerConfig.configFile = None 

908 with self.assertRaises(ValueError): 

909 Butler(butlerConfig) 

910 

911 with self.assertRaises(FileExistsError): 

912 Butler.makeRepo(self.root, standalone=True, 

913 config=Config(self.configFile), overwrite=False) 

914 

915 def testStringification(self): 

916 butler = Butler(self.tmpConfigFile, run="ingest") 

917 butlerStr = str(butler) 

918 

919 if self.datastoreStr is not None: 

920 for testStr in self.datastoreStr: 

921 self.assertIn(testStr, butlerStr) 

922 if self.registryStr is not None: 

923 self.assertIn(self.registryStr, butlerStr) 

924 

925 datastoreName = butler.datastore.name 

926 if self.datastoreName is not None: 

927 for testStr in self.datastoreName: 

928 self.assertIn(testStr, datastoreName) 

929 

930 def testButlerRewriteDataId(self): 

931 """Test that dataIds can be rewritten based on dimension records.""" 

932 

933 butler = Butler(self.tmpConfigFile, run="ingest") 

934 

935 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

936 datasetTypeName = "random_data" 

937 

938 # Create dimension records. 

939 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

940 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

941 "name": "d-r", 

942 "band": "R"}) 

943 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", 

944 "id": 1, "full_name": "det1"}) 

945 

946 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

947 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

948 butler.registry.registerDatasetType(datasetType) 

949 

950 n_exposures = 5 

951 dayobs = 20210530 

952 

953 for i in range(n_exposures): 

954 butler.registry.insertDimensionData("exposure", {"instrument": "DummyCamComp", 

955 "id": i, "obs_id": f"exp{i}", 

956 "seq_num": i, "day_obs": dayobs, 

957 "physical_filter": "d-r"}) 

958 

959 # Write some data. 

960 for i in range(n_exposures): 

961 metric = {"something": i, 

962 "other": "metric", 

963 "list": [2*x for x in range(i)]} 

964 

965 # Use the seq_num for the put to test rewriting. 

966 dataId = {"seq_num": i, "day_obs": dayobs, "detector": 1, "instrument": "DummyCamComp", 

967 "physical_filter": "d-r"} 

968 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

969 

970 # Check that the exposure is correct in the dataId 

971 self.assertEqual(ref.dataId["exposure"], i) 

972 

973 # and check that we can get the dataset back with the same dataId 

974 new_metric = butler.get(datasetTypeName, dataId=dataId) 

975 self.assertEqual(new_metric, metric) 

976 

977 

978class FileDatastoreButlerTests(ButlerTests): 

979 """Common tests and specialization of ButlerTests for butlers backed 

980 by datastores that inherit from FileDatastore. 

981 """ 

982 

983 def checkFileExists(self, root, relpath): 

984 """Checks if file exists at a given path (relative to root). 

985 

986 Test testPutTemplates verifies actual physical existance of the files 

987 in the requested location. 

988 """ 

989 uri = ButlerURI(root, forceDirectory=True) 

990 return uri.join(relpath).exists() 

991 

992 def testPutTemplates(self): 

993 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

994 butler = Butler(self.tmpConfigFile, run="ingest") 

995 

996 # Add needed Dimensions 

997 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

998 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

999 "name": "d-r", 

1000 "band": "R"}) 

1001 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", 

1002 "physical_filter": "d-r"}) 

1003 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", 

1004 "physical_filter": "d-r"}) 

1005 

1006 # Create and store a dataset 

1007 metric = makeExampleMetrics() 

1008 

1009 # Create two almost-identical DatasetTypes (both will use default 

1010 # template) 

1011 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1012 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1013 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1014 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1015 

1016 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1017 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1018 

1019 # Put with exactly the data ID keys needed 

1020 ref = butler.put(metric, "metric1", dataId1) 

1021 uri = butler.getURI(ref) 

1022 self.assertTrue(self.checkFileExists(butler.datastore.root, 

1023 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"), 

1024 f"Checking existence of {uri}") 

1025 

1026 # Check the template based on dimensions 

1027 butler.datastore.templates.validateTemplates([ref]) 

1028 

1029 # Put with extra data ID keys (physical_filter is an optional 

1030 # dependency); should not change template (at least the way we're 

1031 # defining them to behave now; the important thing is that they 

1032 # must be consistent). 

1033 ref = butler.put(metric, "metric2", dataId2) 

1034 uri = butler.getURI(ref) 

1035 self.assertTrue(self.checkFileExists(butler.datastore.root, 

1036 "ingest/metric2/d-r/DummyCamComp_v423.pickle"), 

1037 f"Checking existence of {uri}") 

1038 

1039 # Check the template based on dimensions 

1040 butler.datastore.templates.validateTemplates([ref]) 

1041 

1042 # Now use a file template that will not result in unique filenames 

1043 with self.assertRaises(FileTemplateValidationError): 

1044 butler.put(metric, "metric3", dataId1) 

1045 

1046 def testImportExport(self): 

1047 # Run put/get tests just to create and populate a repo. 

1048 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1049 self.runImportExportTest(storageClass) 

1050 

1051 @unittest.expectedFailure 

1052 def testImportExportVirtualComposite(self): 

1053 # Run put/get tests just to create and populate a repo. 

1054 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1055 self.runImportExportTest(storageClass) 

1056 

1057 def runImportExportTest(self, storageClass): 

1058 """This test does an export to a temp directory and an import back 

1059 into a new temp directory repo. It does not assume a posix datastore""" 

1060 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1061 print("Root:", exportButler.datastore.root) 

1062 # Test that the repo actually has at least one dataset. 

1063 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1064 self.assertGreater(len(datasets), 0) 

1065 # Add a DimensionRecord that's unused by those datasets. 

1066 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1067 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1068 # Export and then import datasets. 

1069 with safeTestTempDir(TESTDIR) as exportDir: 

1070 exportFile = os.path.join(exportDir, "exports.yaml") 

1071 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1072 export.saveDatasets(datasets) 

1073 # Export the same datasets again. This should quietly do 

1074 # nothing because of internal deduplication, and it shouldn't 

1075 # complain about being asked to export the "htm7" elements even 

1076 # though there aren't any in these datasets or in the database. 

1077 export.saveDatasets(datasets, elements=["htm7"]) 

1078 # Save one of the data IDs again; this should be harmless 

1079 # because of internal deduplication. 

1080 export.saveDataIds([datasets[0].dataId]) 

1081 # Save some dimension records directly. 

1082 export.saveDimensionData("skymap", [skymapRecord]) 

1083 self.assertTrue(os.path.exists(exportFile)) 

1084 with safeTestTempDir(TESTDIR) as importDir: 

1085 # We always want this to be a local posix butler 

1086 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1087 # Calling script.butlerImport tests the implementation of the 

1088 # butler command line interface "import" subcommand. Functions 

1089 # in the script folder are generally considered protected and 

1090 # should not be used as public api. 

1091 with open(exportFile, "r") as f: 

1092 script.butlerImport(importDir, export_file=f, directory=exportDir, 

1093 transfer="auto", skip_dimensions=None, reuse_ids=False) 

1094 importButler = Butler(importDir, run="ingest") 

1095 for ref in datasets: 

1096 with self.subTest(ref=ref): 

1097 # Test for existence by passing in the DatasetType and 

1098 # data ID separately, to avoid lookup by dataset_id. 

1099 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1100 self.assertEqual(list(importButler.registry.queryDimensionRecords("skymap")), 

1101 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)]) 

1102 

1103 def testRemoveRuns(self): 

1104 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1105 butler = Butler(self.tmpConfigFile, writeable=True) 

1106 # Load registry data with dimensions to hang datasets off of. 

1107 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1108 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1109 # Add some RUN-type collection. 

1110 run1 = "run1" 

1111 butler.registry.registerRun(run1) 

1112 run2 = "run2" 

1113 butler.registry.registerRun(run2) 

1114 # put a dataset in each 

1115 metric = makeExampleMetrics() 

1116 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1117 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

1118 butler.registry) 

1119 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1120 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1121 uri1 = butler.getURI(ref1, collections=[run1]) 

1122 uri2 = butler.getURI(ref2, collections=[run2]) 

1123 # Remove from both runs with different values for unstore. 

1124 butler.removeRuns([run1], unstore=True) 

1125 butler.removeRuns([run2], unstore=False) 

1126 # Should be nothing in registry for either one, and datastore should 

1127 # not think either exists. 

1128 with self.assertRaises(MissingCollectionError): 

1129 butler.registry.getCollectionType(run1) 

1130 with self.assertRaises(MissingCollectionError): 

1131 butler.registry.getCollectionType(run2) 

1132 self.assertFalse(butler.datastore.exists(ref1)) 

1133 self.assertFalse(butler.datastore.exists(ref2)) 

1134 # The ref we unstored should be gone according to the URI, but the 

1135 # one we forgot should still be around. 

1136 self.assertFalse(uri1.exists()) 

1137 self.assertTrue(uri2.exists()) 

1138 

1139 

1140class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1141 """PosixDatastore specialization of a butler""" 

1142 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1143 fullConfigKey = ".datastore.formatters" 

1144 validationCanFail = True 

1145 datastoreStr = ["/tmp"] 

1146 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1147 registryStr = "/gen3.sqlite3" 

1148 

1149 def testExportTransferCopy(self): 

1150 """Test local export using all transfer modes""" 

1151 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1152 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1153 # Test that the repo actually has at least one dataset. 

1154 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1155 self.assertGreater(len(datasets), 0) 

1156 uris = [exportButler.getURI(d) for d in datasets] 

1157 datastoreRoot = exportButler.datastore.root 

1158 

1159 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1160 

1161 for path in pathsInStore: 

1162 # Assume local file system 

1163 self.assertTrue(self.checkFileExists(datastoreRoot, path), 

1164 f"Checking path {path}") 

1165 

1166 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1167 with safeTestTempDir(TESTDIR) as exportDir: 

1168 with exportButler.export(directory=exportDir, format="yaml", 

1169 transfer=transfer) as export: 

1170 export.saveDatasets(datasets) 

1171 for path in pathsInStore: 

1172 self.assertTrue(self.checkFileExists(exportDir, path), 

1173 f"Check that mode {transfer} exported files") 

1174 

1175 def testPruneDatasets(self): 

1176 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1177 butler = Butler(self.tmpConfigFile, writeable=True) 

1178 # Load registry data with dimensions to hang datasets off of. 

1179 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1180 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1181 # Add some RUN-type collections. 

1182 run1 = "run1" 

1183 butler.registry.registerRun(run1) 

1184 run2 = "run2" 

1185 butler.registry.registerRun(run2) 

1186 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1187 # different runs. ref3 has a different data ID. 

1188 metric = makeExampleMetrics() 

1189 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1190 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass, 

1191 butler.registry) 

1192 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1193 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1194 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1195 

1196 # Simple prune. 

1197 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1198 with self.assertRaises(LookupError): 

1199 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1) 

1200 

1201 # Put data back. 

1202 ref1 = butler.put(metric, ref1.unresolved(), run=run1) 

1203 ref2 = butler.put(metric, ref2.unresolved(), run=run2) 

1204 ref3 = butler.put(metric, ref3.unresolved(), run=run1) 

1205 

1206 # Check that in normal mode, deleting the record will lead to 

1207 # trash not touching the file. 

1208 uri1 = butler.datastore.getURI(ref1) 

1209 butler.datastore.bridge.moveToTrash([ref1]) # Update the dataset_location table 

1210 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id}) 

1211 butler.datastore.trash(ref1) 

1212 butler.datastore.emptyTrash() 

1213 self.assertTrue(uri1.exists()) 

1214 uri1.remove() # Clean it up. 

1215 

1216 # Simulate execution butler setup by deleting the datastore 

1217 # record but keeping the file around and trusting. 

1218 butler.datastore.trustGetRequest = True 

1219 uri2 = butler.datastore.getURI(ref2) 

1220 uri3 = butler.datastore.getURI(ref3) 

1221 self.assertTrue(uri2.exists()) 

1222 self.assertTrue(uri3.exists()) 

1223 

1224 # Remove the datastore record. 

1225 butler.datastore.bridge.moveToTrash([ref2]) # Update the dataset_location table 

1226 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id}) 

1227 self.assertTrue(uri2.exists()) 

1228 butler.datastore.trash([ref2, ref3]) 

1229 # Immediate removal for ref2 file 

1230 self.assertFalse(uri2.exists()) 

1231 # But ref3 has to wait for the empty. 

1232 self.assertTrue(uri3.exists()) 

1233 butler.datastore.emptyTrash() 

1234 self.assertFalse(uri3.exists()) 

1235 

1236 # Clear out the datasets from registry. 

1237 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1238 

1239 

1240class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1241 """InMemoryDatastore specialization of a butler""" 

1242 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1243 fullConfigKey = None 

1244 useTempRoot = False 

1245 validationCanFail = False 

1246 datastoreStr = ["datastore='InMemory"] 

1247 datastoreName = ["InMemoryDatastore@"] 

1248 registryStr = "/gen3.sqlite3" 

1249 

1250 def testIngest(self): 

1251 pass 

1252 

1253 

1254class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1255 """PosixDatastore specialization""" 

1256 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1257 fullConfigKey = ".datastore.datastores.1.formatters" 

1258 validationCanFail = True 

1259 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1260 datastoreName = ["InMemoryDatastore@", f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1261 "SecondDatastore"] 

1262 registryStr = "/gen3.sqlite3" 

1263 

1264 

1265class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1266 """Test that a yaml file in one location can refer to a root in another.""" 

1267 

1268 datastoreStr = ["dir1"] 

1269 # Disable the makeRepo test since we are deliberately not using 

1270 # butler.yaml as the config name. 

1271 fullConfigKey = None 

1272 

1273 def setUp(self): 

1274 self.root = makeTestTempDir(TESTDIR) 

1275 

1276 # Make a new repository in one place 

1277 self.dir1 = os.path.join(self.root, "dir1") 

1278 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1279 

1280 # Move the yaml file to a different place and add a "root" 

1281 self.dir2 = os.path.join(self.root, "dir2") 

1282 os.makedirs(self.dir2, exist_ok=True) 

1283 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1284 config = Config(configFile1) 

1285 config["root"] = self.dir1 

1286 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1287 config.dumpToUri(configFile2) 

1288 os.remove(configFile1) 

1289 self.tmpConfigFile = configFile2 

1290 

1291 def testFileLocations(self): 

1292 self.assertNotEqual(self.dir1, self.dir2) 

1293 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1294 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1295 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1296 

1297 

1298class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1299 """Test that a config file created by makeRepo outside of repo works.""" 

1300 

1301 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1302 

1303 def setUp(self): 

1304 self.root = makeTestTempDir(TESTDIR) 

1305 self.root2 = makeTestTempDir(TESTDIR) 

1306 

1307 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1308 Butler.makeRepo(self.root, config=Config(self.configFile), 

1309 outfile=self.tmpConfigFile) 

1310 

1311 def tearDown(self): 

1312 if os.path.exists(self.root2): 

1313 shutil.rmtree(self.root2, ignore_errors=True) 

1314 super().tearDown() 

1315 

1316 def testConfigExistence(self): 

1317 c = Config(self.tmpConfigFile) 

1318 uri_config = ButlerURI(c["root"]) 

1319 uri_expected = ButlerURI(self.root, forceDirectory=True) 

1320 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1321 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1322 

1323 def testPutGet(self): 

1324 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1325 self.runPutGetTest(storageClass, "test_metric") 

1326 

1327 

1328class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1329 """Test that a config file created by makeRepo outside of repo works.""" 

1330 

1331 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1332 

1333 def setUp(self): 

1334 self.root = makeTestTempDir(TESTDIR) 

1335 self.root2 = makeTestTempDir(TESTDIR) 

1336 

1337 self.tmpConfigFile = self.root2 

1338 Butler.makeRepo(self.root, config=Config(self.configFile), 

1339 outfile=self.tmpConfigFile) 

1340 

1341 def testConfigExistence(self): 

1342 # Append the yaml file else Config constructor does not know the file 

1343 # type. 

1344 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1345 super().testConfigExistence() 

1346 

1347 

1348class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1349 """Test that a config file created by makeRepo outside of repo works.""" 

1350 

1351 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1352 

1353 def setUp(self): 

1354 self.root = makeTestTempDir(TESTDIR) 

1355 self.root2 = makeTestTempDir(TESTDIR) 

1356 

1357 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl() 

1358 Butler.makeRepo(self.root, config=Config(self.configFile), 

1359 outfile=self.tmpConfigFile) 

1360 

1361 

1362@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1363@mock_s3 

1364class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1365 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1366 a local in-memory SqlRegistry. 

1367 """ 

1368 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1369 fullConfigKey = None 

1370 validationCanFail = True 

1371 

1372 bucketName = "anybucketname" 

1373 """Name of the Bucket that will be used in the tests. The name is read from 

1374 the config file used with the tests during set-up. 

1375 """ 

1376 

1377 root = "butlerRoot/" 

1378 """Root repository directory expected to be used in case useTempRoot=False. 

1379 Otherwise the root is set to a 20 characters long randomly generated string 

1380 during set-up. 

1381 """ 

1382 

1383 datastoreStr = [f"datastore={root}"] 

1384 """Contains all expected root locations in a format expected to be 

1385 returned by Butler stringification. 

1386 """ 

1387 

1388 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1389 """The expected format of the S3 Datastore string.""" 

1390 

1391 registryStr = "/gen3.sqlite3" 

1392 """Expected format of the Registry string.""" 

1393 

1394 def genRoot(self): 

1395 """Returns a random string of len 20 to serve as a root 

1396 name for the temporary bucket repo. 

1397 

1398 This is equivalent to tempfile.mkdtemp as this is what self.root 

1399 becomes when useTempRoot is True. 

1400 """ 

1401 rndstr = "".join( 

1402 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1403 ) 

1404 return rndstr + "/" 

1405 

1406 def setUp(self): 

1407 config = Config(self.configFile) 

1408 uri = ButlerURI(config[".datastore.datastore.root"]) 

1409 self.bucketName = uri.netloc 

1410 

1411 # set up some fake credentials if they do not exist 

1412 self.usingDummyCredentials = setAwsEnvCredentials() 

1413 

1414 if self.useTempRoot: 

1415 self.root = self.genRoot() 

1416 rooturi = f"s3://{self.bucketName}/{self.root}" 

1417 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1418 

1419 # need local folder to store registry database 

1420 self.reg_dir = makeTestTempDir(TESTDIR) 

1421 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1422 

1423 # MOTO needs to know that we expect Bucket bucketname to exist 

1424 # (this used to be the class attribute bucketName) 

1425 s3 = boto3.resource("s3") 

1426 s3.create_bucket(Bucket=self.bucketName) 

1427 

1428 self.datastoreStr = f"datastore={self.root}" 

1429 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1430 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1431 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1432 

1433 def tearDown(self): 

1434 s3 = boto3.resource("s3") 

1435 bucket = s3.Bucket(self.bucketName) 

1436 try: 

1437 bucket.objects.all().delete() 

1438 except botocore.exceptions.ClientError as e: 

1439 if e.response["Error"]["Code"] == "404": 

1440 # the key was not reachable - pass 

1441 pass 

1442 else: 

1443 raise 

1444 

1445 bucket = s3.Bucket(self.bucketName) 

1446 bucket.delete() 

1447 

1448 # unset any potentially set dummy credentials 

1449 if self.usingDummyCredentials: 

1450 unsetAwsEnvCredentials() 

1451 

1452 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1453 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1454 

1455 if self.useTempRoot and os.path.exists(self.root): 

1456 shutil.rmtree(self.root, ignore_errors=True) 

1457 

1458 

1459@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1460# Mock required environment variables during tests 

1461@unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1462 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1463 TESTDIR, "config/testConfigs/webdav/token"), 

1464 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1465class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1466 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1467 a local in-memory SqlRegistry. 

1468 """ 

1469 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1470 fullConfigKey = None 

1471 validationCanFail = True 

1472 

1473 serverName = "localhost" 

1474 """Name of the server that will be used in the tests. 

1475 """ 

1476 

1477 portNumber = 8080 

1478 """Port on which the webdav server listens. Automatically chosen 

1479 at setUpClass via the _getfreeport() method 

1480 """ 

1481 

1482 root = "butlerRoot/" 

1483 """Root repository directory expected to be used in case useTempRoot=False. 

1484 Otherwise the root is set to a 20 characters long randomly generated string 

1485 during set-up. 

1486 """ 

1487 

1488 datastoreStr = [f"datastore={root}"] 

1489 """Contains all expected root locations in a format expected to be 

1490 returned by Butler stringification. 

1491 """ 

1492 

1493 datastoreName = ["FileDatastore@https://{serverName}/{root}"] 

1494 """The expected format of the WebdavDatastore string.""" 

1495 

1496 registryStr = "/gen3.sqlite3" 

1497 """Expected format of the Registry string.""" 

1498 

1499 serverThread = None 

1500 """Thread in which the local webdav server will run""" 

1501 

1502 stopWebdavServer = False 

1503 """This flag will cause the webdav server to 

1504 gracefully shut down when True 

1505 """ 

1506 

1507 def genRoot(self): 

1508 """Returns a random string of len 20 to serve as a root 

1509 name for the temporary bucket repo. 

1510 

1511 This is equivalent to tempfile.mkdtemp as this is what self.root 

1512 becomes when useTempRoot is True. 

1513 """ 

1514 rndstr = "".join( 

1515 random.choice(string.ascii_uppercase + string.digits) for _ in range(20) 

1516 ) 

1517 return rndstr + "/" 

1518 

1519 @classmethod 

1520 def setUpClass(cls): 

1521 # Do the same as inherited class 

1522 cls.storageClassFactory = StorageClassFactory() 

1523 cls.storageClassFactory.addFromConfig(cls.configFile) 

1524 

1525 cls.portNumber = cls._getfreeport() 

1526 # Run a local webdav server on which tests will be run 

1527 cls.serverThread = Thread(target=cls._serveWebdav, 

1528 args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), 

1529 daemon=True) 

1530 cls.serverThread.start() 

1531 # Wait for it to start 

1532 time.sleep(3) 

1533 

1534 @classmethod 

1535 def tearDownClass(cls): 

1536 # Ask for graceful shut down of the webdav server 

1537 cls.stopWebdavServer = True 

1538 # Wait for the thread to exit 

1539 cls.serverThread.join() 

1540 

1541 # Mock required environment variables during tests 

1542 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1543 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1544 TESTDIR, "config/testConfigs/webdav/token"), 

1545 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1546 def setUp(self): 

1547 config = Config(self.configFile) 

1548 

1549 if self.useTempRoot: 

1550 self.root = self.genRoot() 

1551 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1552 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1553 

1554 # need local folder to store registry database 

1555 self.reg_dir = makeTestTempDir(TESTDIR) 

1556 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1557 

1558 self.datastoreStr = f"datastore={self.root}" 

1559 self.datastoreName = [f"FileDatastore@{self.rooturi}"] 

1560 

1561 if not isWebdavEndpoint(self.rooturi): 

1562 raise OSError("Webdav server not running properly: cannot run tests.") 

1563 

1564 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1565 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1566 

1567 # Mock required environment variables during tests 

1568 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1569 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

1570 TESTDIR, "config/testConfigs/webdav/token"), 

1571 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

1572 def tearDown(self): 

1573 # Clear temporary directory 

1574 ButlerURI(self.rooturi).remove() 

1575 ButlerURI(self.rooturi).session.close() 

1576 

1577 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1578 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1579 

1580 if self.useTempRoot and os.path.exists(self.root): 

1581 shutil.rmtree(self.root, ignore_errors=True) 

1582 

1583 def _serveWebdav(self, port: int, stopWebdavServer): 

1584 """Starts a local webdav-compatible HTTP server, 

1585 Listening on http://localhost:port 

1586 This server only runs when this test class is instantiated, 

1587 and then shuts down. Must be started is a separate thread. 

1588 

1589 Parameters 

1590 ---------- 

1591 port : `int` 

1592 The port number on which the server should listen 

1593 """ 

1594 root_path = gettempdir() 

1595 

1596 config = { 

1597 "host": "0.0.0.0", 

1598 "port": port, 

1599 "provider_mapping": {"/": root_path}, 

1600 "http_authenticator": { 

1601 "domain_controller": None 

1602 }, 

1603 "simple_dc": {"user_mapping": {"*": True}}, 

1604 "verbose": 0, 

1605 } 

1606 app = WsgiDAVApp(config) 

1607 

1608 server_args = { 

1609 "bind_addr": (config["host"], config["port"]), 

1610 "wsgi_app": app, 

1611 } 

1612 server = wsgi.Server(**server_args) 

1613 server.prepare() 

1614 

1615 try: 

1616 # Start the actual server in a separate thread 

1617 t = Thread(target=server.serve, daemon=True) 

1618 t.start() 

1619 # watch stopWebdavServer, and gracefully 

1620 # shut down the server when True 

1621 while True: 

1622 if stopWebdavServer(): 

1623 break 

1624 time.sleep(1) 

1625 except KeyboardInterrupt: 

1626 print("Caught Ctrl-C, shutting down...") 

1627 finally: 

1628 server.stop() 

1629 t.join() 

1630 

1631 def _getfreeport(): 

1632 """ 

1633 Determines a free port using sockets. 

1634 """ 

1635 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

1636 free_socket.bind(('0.0.0.0', 0)) 

1637 free_socket.listen() 

1638 port = free_socket.getsockname()[1] 

1639 free_socket.close() 

1640 return port 

1641 

1642 

1643class PosixDatastoreTransfers(unittest.TestCase): 

1644 """Test data transfers between butlers. 

1645 

1646 Test for different managers. UUID to UUID and integer to integer are 

1647 tested. UUID to integer is not supported since we do not currently 

1648 want to allow that. Integer to UUID is supported with the caveat 

1649 that UUID4 will be generated and this will be incorrect for raw 

1650 dataset types. The test ignores that. 

1651 """ 

1652 

1653 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1654 

1655 @classmethod 

1656 def setUpClass(cls): 

1657 cls.storageClassFactory = StorageClassFactory() 

1658 cls.storageClassFactory.addFromConfig(cls.configFile) 

1659 

1660 def setUp(self): 

1661 self.root = makeTestTempDir(TESTDIR) 

1662 self.config = Config(self.configFile) 

1663 

1664 def tearDown(self): 

1665 removeTestTempDir(self.root) 

1666 

1667 def create_butler(self, manager, label): 

1668 config = Config(self.configFile) 

1669 config["registry", "managers", "datasets"] = manager 

1670 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), 

1671 writeable=True) 

1672 

1673 def create_butlers(self, manager1, manager2): 

1674 self.source_butler = self.create_butler(manager1, "1") 

1675 self.target_butler = self.create_butler(manager2, "2") 

1676 

1677 def testTransferUuidToUuid(self): 

1678 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions." 

1679 "ByDimensionsDatasetRecordStorageManagerUUID", 

1680 "lsst.daf.butler.registry.datasets.byDimensions." 

1681 "ByDimensionsDatasetRecordStorageManagerUUID", 

1682 ) 

1683 # Setting id_gen_map should have no effect here 

1684 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1685 

1686 def testTransferIntToInt(self): 

1687 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions." 

1688 "ByDimensionsDatasetRecordStorageManager", 

1689 "lsst.daf.butler.registry.datasets.byDimensions." 

1690 "ByDimensionsDatasetRecordStorageManager", 

1691 ) 

1692 # int dataset ID only allows UNIQUE 

1693 self.assertButlerTransfers() 

1694 

1695 def testTransferIntToUuid(self): 

1696 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions." 

1697 "ByDimensionsDatasetRecordStorageManager", 

1698 "lsst.daf.butler.registry.datasets.byDimensions." 

1699 "ByDimensionsDatasetRecordStorageManagerUUID", 

1700 ) 

1701 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1702 

1703 def testTransferMissing(self): 

1704 """Test transfers where datastore records are missing. 

1705 

1706 This is how execution butler works. 

1707 """ 

1708 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions." 

1709 "ByDimensionsDatasetRecordStorageManagerUUID", 

1710 "lsst.daf.butler.registry.datasets.byDimensions." 

1711 "ByDimensionsDatasetRecordStorageManagerUUID", 

1712 ) 

1713 

1714 # Configure the source butler to allow trust. 

1715 self.source_butler.datastore.trustGetRequest = True 

1716 

1717 self.assertButlerTransfers(purge=True) 

1718 

1719 def testTransferMissingDisassembly(self): 

1720 """Test transfers where datastore records are missing. 

1721 

1722 This is how execution butler works. 

1723 """ 

1724 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions." 

1725 "ByDimensionsDatasetRecordStorageManagerUUID", 

1726 "lsst.daf.butler.registry.datasets.byDimensions." 

1727 "ByDimensionsDatasetRecordStorageManagerUUID", 

1728 ) 

1729 

1730 # Configure the source butler to allow trust. 

1731 self.source_butler.datastore.trustGetRequest = True 

1732 

1733 # Test disassembly. 

1734 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

1735 

1736 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"): 

1737 """Test that a run can be transferred to another butler.""" 

1738 

1739 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

1740 datasetTypeName = "random_data" 

1741 

1742 # Test will create 3 collections and we will want to transfer 

1743 # two of those three. 

1744 runs = ["run1", "run2", "other"] 

1745 

1746 # Also want to use two different dataset types to ensure that 

1747 # grouping works. 

1748 datasetTypeNames = ["random_data", "random_data_2"] 

1749 

1750 # Create the run collections in the source butler. 

1751 for run in runs: 

1752 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

1753 

1754 # Create dimensions in both butlers (transfer will not create them). 

1755 n_exposures = 30 

1756 for butler in (self.source_butler, self.target_butler): 

1757 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1758 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp", 

1759 "name": "d-r", 

1760 "band": "R"}) 

1761 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", 

1762 "id": 1, "full_name": "det1"}) 

1763 

1764 for i in range(n_exposures): 

1765 butler.registry.insertDimensionData("exposure", {"instrument": "DummyCamComp", 

1766 "id": i, "obs_id": f"exp{i}", 

1767 "physical_filter": "d-r"}) 

1768 

1769 # Create dataset types in the source butler. 

1770 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1771 for datasetTypeName in datasetTypeNames: 

1772 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1773 self.source_butler.registry.registerDatasetType(datasetType) 

1774 

1775 # Write a dataset to an unrelated run -- this will ensure that 

1776 # we are rewriting integer dataset ids in the target if necessary. 

1777 # Will not be relevant for UUID. 

1778 run = "distraction" 

1779 butler = Butler(butler=self.source_butler, run=run) 

1780 butler.put(makeExampleMetrics(), datasetTypeName, 

1781 exposure=1, detector=1, instrument="DummyCamComp", physical_filter="d-r") 

1782 

1783 # Write some example metrics to the source 

1784 butler = Butler(butler=self.source_butler) 

1785 

1786 # Set of DatasetRefs that should be in the list of refs to transfer 

1787 # but which will not be transferred. 

1788 deleted = set() 

1789 

1790 n_expected = 20 # Number of datasets expected to be transferred 

1791 source_refs = [] 

1792 for i in range(n_exposures): 

1793 # Put a third of datasets into each collection, only retain 

1794 # two thirds. 

1795 index = i % 3 

1796 run = runs[index] 

1797 datasetTypeName = datasetTypeNames[i % 2] 

1798 

1799 metric_data = {"summary": {"counter": i}, 

1800 "output": {"text": "metric"}, 

1801 "data": [2*x for x in range(i)]} 

1802 metric = MetricsExample(**metric_data) 

1803 dataId = {"exposure": i, "detector": 1, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1804 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

1805 

1806 # Remove the datastore record using low-level API 

1807 if purge: 

1808 # Remove records for a fraction. 

1809 if index == 1: 

1810 

1811 # For one of these delete the file as well. 

1812 # This allows the "missing" code to filter the 

1813 # file out. 

1814 if not deleted: 

1815 primary, uris = butler.datastore.getURIs(ref) 

1816 if primary: 

1817 primary.remove() 

1818 for uri in uris.values(): 

1819 uri.remove() 

1820 n_expected -= 1 

1821 deleted.add(ref) 

1822 

1823 # Remove the datastore record. 

1824 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

1825 

1826 if index < 2: 

1827 source_refs.append(ref) 

1828 if ref not in deleted: 

1829 new_metric = butler.get(ref.unresolved(), collections=run) 

1830 self.assertEqual(new_metric, metric) 

1831 

1832 # Create some bad dataset types to ensure we check for inconsistent 

1833 # definitions. 

1834 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

1835 for datasetTypeName in datasetTypeNames: 

1836 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

1837 self.target_butler.registry.registerDatasetType(datasetType) 

1838 with self.assertRaises(ConflictingDefinitionError): 

1839 self.target_butler.transfer_from(self.source_butler, source_refs, 

1840 id_gen_map=id_gen_map) 

1841 # And remove the bad definitions. 

1842 for datasetTypeName in datasetTypeNames: 

1843 self.target_butler.registry.removeDatasetType(datasetTypeName) 

1844 

1845 # Transfer without creating dataset types should fail. 

1846 with self.assertRaises(KeyError): 

1847 self.target_butler.transfer_from(self.source_butler, source_refs, 

1848 id_gen_map=id_gen_map) 

1849 

1850 # Now transfer them to the second butler 

1851 with self.assertLogs(level=logging.DEBUG) as cm: 

1852 transferred = self.target_butler.transfer_from(self.source_butler, source_refs, 

1853 id_gen_map=id_gen_map, 

1854 register_dataset_types=True) 

1855 self.assertEqual(len(transferred), n_expected) 

1856 log_output = ";".join(cm.output) 

1857 self.assertIn("found in datastore for chunk", log_output) 

1858 self.assertIn("Creating output run", log_output) 

1859 

1860 # Do the transfer twice to ensure that it will do nothing extra. 

1861 # Only do this if purge=True because it does not work for int 

1862 # dataset_id. 

1863 if purge: 

1864 # This should not need to register dataset types. 

1865 transferred = self.target_butler.transfer_from(self.source_butler, source_refs, 

1866 id_gen_map=id_gen_map) 

1867 self.assertEqual(len(transferred), n_expected) 

1868 

1869 # Also do an explicit low-level transfer to trigger some 

1870 # edge cases. 

1871 with self.assertLogs(level=logging.DEBUG) as cm: 

1872 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

1873 log_output = ";".join(cm.output) 

1874 self.assertIn("no file artifacts exist", log_output) 

1875 

1876 with self.assertRaises(TypeError): 

1877 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) 

1878 

1879 with self.assertRaises(ValueError): 

1880 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs, 

1881 transfer="split") 

1882 

1883 # Now try to get the same refs from the new butler. 

1884 for ref in source_refs: 

1885 if ref not in deleted: 

1886 unresolved_ref = ref.unresolved() 

1887 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run) 

1888 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run) 

1889 self.assertEqual(new_metric, old_metric) 

1890 

1891 # Now prune run2 collection and create instead a CHAINED collection. 

1892 # This should block the transfer. 

1893 self.target_butler.pruneCollection("run2", purge=True, unstore=True) 

1894 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

1895 with self.assertRaises(TypeError): 

1896 # Re-importing the run1 datasets can be problematic if they 

1897 # use integer IDs so filter those out. 

1898 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

1899 self.target_butler.transfer_from(self.source_butler, to_transfer, 

1900 id_gen_map=id_gen_map) 

1901 

1902 

1903if __name__ == "__main__": 1903 ↛ 1904line 1903 didn't jump to line 1904, because the condition on line 1903 was never true

1904 unittest.main()