Coverage for tests/test_datastore.py: 11%

1074 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-25 15:14 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import os 

25import pickle 

26import shutil 

27import tempfile 

28import time 

29import unittest 

30import unittest.mock 

31import uuid 

32from collections.abc import Callable 

33from typing import Any, cast 

34 

35import lsst.utils.tests 

36import yaml 

37from lsst.daf.butler import ( 

38 Config, 

39 DataCoordinate, 

40 DatasetIdGenEnum, 

41 DatasetRef, 

42 DatasetRefURIs, 

43 DatasetType, 

44 DatasetTypeNotSupportedError, 

45 Datastore, 

46 DatastoreCacheManager, 

47 DatastoreCacheManagerConfig, 

48 DatastoreConfig, 

49 DatastoreDisabledCacheManager, 

50 DatastoreValidationError, 

51 DimensionUniverse, 

52 FileDataset, 

53 NullDatastore, 

54 StorageClass, 

55 StorageClassFactory, 

56 StoredFileInfo, 

57) 

58from lsst.daf.butler.formatters.yaml import YamlFormatter 

59from lsst.daf.butler.tests import ( 

60 BadNoWriteFormatter, 

61 BadWriteFormatter, 

62 DatasetTestHelper, 

63 DatastoreTestHelper, 

64 DummyRegistry, 

65 MetricsExample, 

66 MetricsExampleDataclass, 

67 MetricsExampleModel, 

68) 

69from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel 

70from lsst.daf.butler.tests.utils import TestCaseMixin 

71from lsst.resources import ResourcePath 

72from lsst.utils import doImport 

73 

74TESTDIR = os.path.dirname(__file__) 

75 

76 

77def makeExampleMetrics(use_none: bool = False) -> MetricsExample: 

78 """Make example dataset that can be stored in butler.""" 

79 if use_none: 

80 array = None 

81 else: 

82 array = [563, 234, 456.7, 105, 2054, -1045] 

83 return MetricsExample( 

84 {"AM1": 5.2, "AM2": 30.6}, 

85 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

86 array, 

87 ) 

88 

89 

90class TransactionTestError(Exception): 

91 """Specific error for transactions, to prevent misdiagnosing 

92 that might otherwise occur when a standard exception is used. 

93 """ 

94 

95 pass 

96 

97 

98class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper, TestCaseMixin): 

99 """Support routines for datastore testing""" 

100 

101 root: str | None = None 

102 universe: DimensionUniverse 

103 storageClassFactory: StorageClassFactory 

104 

105 @classmethod 

106 def setUpClass(cls) -> None: 

107 # Storage Classes are fixed for all datastores in these tests 

108 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

109 cls.storageClassFactory = StorageClassFactory() 

110 cls.storageClassFactory.addFromConfig(scConfigFile) 

111 

112 # Read the Datastore config so we can get the class 

113 # information (since we should not assume the constructor 

114 # name here, but rely on the configuration file itself) 

115 datastoreConfig = DatastoreConfig(cls.configFile) 

116 cls.datastoreType = cast(type[Datastore], doImport(datastoreConfig["cls"])) 

117 cls.universe = DimensionUniverse() 

118 

119 def setUp(self) -> None: 

120 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

121 

122 def tearDown(self) -> None: 

123 if self.root is not None and os.path.exists(self.root): 

124 shutil.rmtree(self.root, ignore_errors=True) 

125 

126 

127class DatastoreTests(DatastoreTestsBase): 

128 """Some basic tests of a simple datastore.""" 

129 

130 hasUnsupportedPut = True 

131 rootKeys: tuple[str, ...] | None = None 

132 isEphemeral: bool = False 

133 validationCanFail: bool = False 

134 

135 def testConfigRoot(self) -> None: 

136 full = DatastoreConfig(self.configFile) 

137 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

138 newroot = "/random/location" 

139 self.datastoreType.setConfigRoot(newroot, config, full) 

140 if self.rootKeys: 

141 for k in self.rootKeys: 

142 self.assertIn(newroot, config[k]) 

143 

144 def testConstructor(self) -> None: 

145 datastore = self.makeDatastore() 

146 self.assertIsNotNone(datastore) 

147 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

148 

149 def testConfigurationValidation(self) -> None: 

150 datastore = self.makeDatastore() 

151 sc = self.storageClassFactory.getStorageClass("ThingOne") 

152 datastore.validateConfiguration([sc]) 

153 

154 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

155 if self.validationCanFail: 

156 with self.assertRaises(DatastoreValidationError): 

157 datastore.validateConfiguration([sc2], logFailures=True) 

158 

159 dimensions = self.universe.extract(("visit", "physical_filter")) 

160 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

161 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

162 datastore.validateConfiguration([ref]) 

163 

164 def testParameterValidation(self) -> None: 

165 """Check that parameters are validated""" 

166 sc = self.storageClassFactory.getStorageClass("ThingOne") 

167 dimensions = self.universe.extract(("visit", "physical_filter")) 

168 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

169 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

170 datastore = self.makeDatastore() 

171 data = {1: 2, 3: 4} 

172 datastore.put(data, ref) 

173 newdata = datastore.get(ref) 

174 self.assertEqual(data, newdata) 

175 with self.assertRaises(KeyError): 

176 newdata = datastore.get(ref, parameters={"missing": 5}) 

177 

178 def testBasicPutGet(self) -> None: 

179 metrics = makeExampleMetrics() 

180 datastore = self.makeDatastore() 

181 

182 # Create multiple storage classes for testing different formulations 

183 storageClasses = [ 

184 self.storageClassFactory.getStorageClass(sc) 

185 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle") 

186 ] 

187 

188 dimensions = self.universe.extract(("visit", "physical_filter")) 

189 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

190 dataId2 = {"instrument": "dummy", "visit": 53, "physical_filter": "V", "band": "v"} 

191 

192 for sc in storageClasses: 

193 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

194 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2) 

195 

196 # Make sure that using getManyURIs without predicting before the 

197 # dataset has been put raises. 

198 with self.assertRaises(FileNotFoundError): 

199 datastore.getManyURIs([ref], predict=False) 

200 

201 # Make sure that using getManyURIs with predicting before the 

202 # dataset has been put predicts the URI. 

203 uris = datastore.getManyURIs([ref, ref2], predict=True) 

204 self.assertIn("52", uris[ref].primaryURI.geturl()) 

205 self.assertIn("#predicted", uris[ref].primaryURI.geturl()) 

206 self.assertIn("53", uris[ref2].primaryURI.geturl()) 

207 self.assertIn("#predicted", uris[ref2].primaryURI.geturl()) 

208 

209 datastore.put(metrics, ref) 

210 

211 # Does it exist? 

212 self.assertTrue(datastore.exists(ref)) 

213 self.assertTrue(datastore.knows(ref)) 

214 multi = datastore.knows_these([ref]) 

215 self.assertTrue(multi[ref]) 

216 multi = datastore.mexists([ref, ref2]) 

217 self.assertTrue(multi[ref]) 

218 self.assertFalse(multi[ref2]) 

219 

220 # Get 

221 metricsOut = datastore.get(ref, parameters=None) 

222 self.assertEqual(metrics, metricsOut) 

223 

224 uri = datastore.getURI(ref) 

225 self.assertEqual(uri.scheme, self.uriScheme) 

226 

227 uris = datastore.getManyURIs([ref]) 

228 self.assertEqual(len(uris), 1) 

229 ref, uri = uris.popitem() 

230 self.assertTrue(uri.primaryURI.exists()) 

231 self.assertFalse(uri.componentURIs) 

232 

233 # Get a component -- we need to construct new refs for them 

234 # with derived storage classes but with parent ID 

235 for comp in ("data", "output"): 

236 compRef = ref.makeComponentRef(comp) 

237 output = datastore.get(compRef) 

238 self.assertEqual(output, getattr(metricsOut, comp)) 

239 

240 uri = datastore.getURI(compRef) 

241 self.assertEqual(uri.scheme, self.uriScheme) 

242 

243 uris = datastore.getManyURIs([compRef]) 

244 self.assertEqual(len(uris), 1) 

245 

246 storageClass = sc 

247 

248 # Check that we can put a metric with None in a component and 

249 # get it back as None 

250 metricsNone = makeExampleMetrics(use_none=True) 

251 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V", "band": "v"} 

252 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone) 

253 datastore.put(metricsNone, refNone) 

254 

255 comp = "data" 

256 for comp in ("data", "output"): 

257 compRef = refNone.makeComponentRef(comp) 

258 output = datastore.get(compRef) 

259 self.assertEqual(output, getattr(metricsNone, comp)) 

260 

261 # Check that a put fails if the dataset type is not supported 

262 if self.hasUnsupportedPut: 

263 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

264 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

265 with self.assertRaises(DatasetTypeNotSupportedError): 

266 datastore.put(metrics, ref) 

267 

268 # These should raise 

269 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId) 

270 with self.assertRaises(FileNotFoundError): 

271 # non-existing file 

272 datastore.get(ref) 

273 

274 # Get a URI from it 

275 uri = datastore.getURI(ref, predict=True) 

276 self.assertEqual(uri.scheme, self.uriScheme) 

277 

278 with self.assertRaises(FileNotFoundError): 

279 datastore.getURI(ref) 

280 

281 def testTrustGetRequest(self) -> None: 

282 """Check that we can get datasets that registry knows nothing about.""" 

283 datastore = self.makeDatastore() 

284 

285 # Skip test if the attribute is not defined 

286 if not hasattr(datastore, "trustGetRequest"): 

287 return 

288 

289 metrics = makeExampleMetrics() 

290 

291 i = 0 

292 for sc_name in ("StructuredDataNoComponents", "StructuredData", "StructuredComposite"): 

293 i += 1 

294 datasetTypeName = f"test_metric{i}" # Different dataset type name each time. 

295 

296 if sc_name == "StructuredComposite": 

297 disassembled = True 

298 else: 

299 disassembled = False 

300 

301 # Start datastore in default configuration of using registry 

302 datastore.trustGetRequest = False 

303 

304 # Create multiple storage classes for testing with or without 

305 # disassembly 

306 sc = self.storageClassFactory.getStorageClass(sc_name) 

307 dimensions = self.universe.extract(("visit", "physical_filter")) 

308 

309 dataId = {"instrument": "dummy", "visit": 52 + i, "physical_filter": "V", "band": "v"} 

310 

311 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId) 

312 datastore.put(metrics, ref) 

313 

314 # Does it exist? 

315 self.assertTrue(datastore.exists(ref)) 

316 self.assertTrue(datastore.knows(ref)) 

317 multi = datastore.knows_these([ref]) 

318 self.assertTrue(multi[ref]) 

319 multi = datastore.mexists([ref]) 

320 self.assertTrue(multi[ref]) 

321 

322 # Get 

323 metricsOut = datastore.get(ref) 

324 self.assertEqual(metrics, metricsOut) 

325 

326 # Get the URI(s) 

327 primaryURI, componentURIs = datastore.getURIs(ref) 

328 if disassembled: 

329 self.assertIsNone(primaryURI) 

330 self.assertEqual(len(componentURIs), 3) 

331 else: 

332 self.assertIn(datasetTypeName, primaryURI.path) 

333 self.assertFalse(componentURIs) 

334 

335 # Delete registry entry so now we are trusting 

336 datastore.removeStoredItemInfo(ref) 

337 

338 # Now stop trusting and check that things break 

339 datastore.trustGetRequest = False 

340 

341 # Does it exist? 

342 self.assertFalse(datastore.exists(ref)) 

343 self.assertFalse(datastore.knows(ref)) 

344 multi = datastore.knows_these([ref]) 

345 self.assertFalse(multi[ref]) 

346 multi = datastore.mexists([ref]) 

347 self.assertFalse(multi[ref]) 

348 

349 with self.assertRaises(FileNotFoundError): 

350 datastore.get(ref) 

351 

352 if sc_name != "StructuredDataNoComponents": 

353 with self.assertRaises(FileNotFoundError): 

354 datastore.get(ref.makeComponentRef("data")) 

355 

356 # URI should fail unless we ask for prediction 

357 with self.assertRaises(FileNotFoundError): 

358 datastore.getURIs(ref) 

359 

360 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

361 if disassembled: 

362 self.assertIsNone(predicted_primary) 

363 self.assertEqual(len(predicted_disassembled), 3) 

364 for uri in predicted_disassembled.values(): 

365 self.assertEqual(uri.fragment, "predicted") 

366 self.assertIn(datasetTypeName, uri.path) 

367 else: 

368 self.assertIn(datasetTypeName, predicted_primary.path) 

369 self.assertFalse(predicted_disassembled) 

370 self.assertEqual(predicted_primary.fragment, "predicted") 

371 

372 # Now enable registry-free trusting mode 

373 datastore.trustGetRequest = True 

374 

375 # Try again to get it 

376 metricsOut = datastore.get(ref) 

377 self.assertEqual(metricsOut, metrics) 

378 

379 # Does it exist? 

380 self.assertTrue(datastore.exists(ref)) 

381 

382 # Get a component 

383 if sc_name != "StructuredDataNoComponents": 

384 comp = "data" 

385 compRef = ref.makeComponentRef(comp) 

386 output = datastore.get(compRef) 

387 self.assertEqual(output, getattr(metrics, comp)) 

388 

389 # Get the URI -- if we trust this should work even without 

390 # enabling prediction. 

391 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

392 self.assertEqual(primaryURI2, primaryURI) 

393 self.assertEqual(componentURIs2, componentURIs) 

394 

395 # Check for compatible storage class. 

396 if sc_name in ("StructuredDataNoComponents", "StructuredData"): 

397 # Make new dataset ref with compatible storage class. 

398 ref_comp = ref.overrideStorageClass("StructuredDataDictJson") 

399 

400 # Without `set_retrieve_dataset_type_method` it will fail to 

401 # find correct file. 

402 self.assertFalse(datastore.exists(ref_comp)) 

403 with self.assertRaises(FileNotFoundError): 

404 datastore.get(ref_comp) 

405 with self.assertRaises(FileNotFoundError): 

406 datastore.get(ref, storageClass="StructuredDataDictJson") 

407 

408 # Need a special method to generate stored dataset type. 

409 def _stored_dataset_type(name: str, ref: DatasetRef = ref) -> DatasetType: 

410 if name == ref.datasetType.name: 

411 return ref.datasetType 

412 raise ValueError(f"Unexpected dataset type name {ref.datasetType.name}") 

413 

414 datastore.set_retrieve_dataset_type_method(_stored_dataset_type) 

415 

416 # Storage class override with original dataset ref. 

417 metrics_as_dict = datastore.get(ref, storageClass="StructuredDataDictJson") 

418 self.assertIsInstance(metrics_as_dict, dict) 

419 

420 # get() should return a dict now. 

421 metrics_as_dict = datastore.get(ref_comp) 

422 self.assertIsInstance(metrics_as_dict, dict) 

423 

424 # exists() should work as well. 

425 self.assertTrue(datastore.exists(ref_comp)) 

426 

427 datastore.set_retrieve_dataset_type_method(None) 

428 

429 def testDisassembly(self) -> None: 

430 """Test disassembly within datastore.""" 

431 metrics = makeExampleMetrics() 

432 if self.isEphemeral: 

433 # in-memory datastore does not disassemble 

434 return 

435 

436 # Create multiple storage classes for testing different formulations 

437 # of composites. One of these will not disassemble to provide 

438 # a reference. 

439 storageClasses = [ 

440 self.storageClassFactory.getStorageClass(sc) 

441 for sc in ( 

442 "StructuredComposite", 

443 "StructuredCompositeTestA", 

444 "StructuredCompositeTestB", 

445 "StructuredCompositeReadComp", 

446 "StructuredData", # No disassembly 

447 "StructuredCompositeReadCompNoDisassembly", 

448 ) 

449 ] 

450 

451 # Create the test datastore 

452 datastore = self.makeDatastore() 

453 

454 # Dummy dataId 

455 dimensions = self.universe.extract(("visit", "physical_filter")) 

456 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

457 

458 for i, sc in enumerate(storageClasses): 

459 with self.subTest(storageClass=sc.name): 

460 # Create a different dataset type each time round 

461 # so that a test failure in this subtest does not trigger 

462 # a cascade of tests because of file clashes 

463 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId) 

464 

465 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

466 

467 datastore.put(metrics, ref) 

468 

469 baseURI, compURIs = datastore.getURIs(ref) 

470 if disassembled: 

471 self.assertIsNone(baseURI) 

472 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

473 else: 

474 self.assertIsNotNone(baseURI) 

475 self.assertEqual(compURIs, {}) 

476 

477 metrics_get = datastore.get(ref) 

478 self.assertEqual(metrics_get, metrics) 

479 

480 # Retrieve the composite with read parameter 

481 stop = 4 

482 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

483 self.assertEqual(metrics_get.summary, metrics.summary) 

484 self.assertEqual(metrics_get.output, metrics.output) 

485 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

486 

487 # Retrieve a component 

488 data = datastore.get(ref.makeComponentRef("data")) 

489 self.assertEqual(data, metrics.data) 

490 

491 # On supported storage classes attempt to access a read 

492 # only component 

493 if "ReadComp" in sc.name: 

494 cRef = ref.makeComponentRef("counter") 

495 counter = datastore.get(cRef) 

496 self.assertEqual(counter, len(metrics.data)) 

497 

498 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

499 self.assertEqual(counter, stop) 

500 

501 datastore.remove(ref) 

502 

503 def prepDeleteTest(self, n_refs: int = 1) -> tuple[Datastore, tuple[DatasetRef, ...]]: 

504 metrics = makeExampleMetrics() 

505 datastore = self.makeDatastore() 

506 # Put 

507 dimensions = self.universe.extract(("visit", "physical_filter")) 

508 sc = self.storageClassFactory.getStorageClass("StructuredData") 

509 refs = [] 

510 for i in range(n_refs): 

511 dataId = {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U", "band": "u"} 

512 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

513 datastore.put(metrics, ref) 

514 

515 # Does it exist? 

516 self.assertTrue(datastore.exists(ref)) 

517 

518 # Get 

519 metricsOut = datastore.get(ref) 

520 self.assertEqual(metrics, metricsOut) 

521 refs.append(ref) 

522 

523 return datastore, *refs 

524 

525 def testRemove(self) -> None: 

526 datastore, ref = self.prepDeleteTest() 

527 

528 # Remove 

529 datastore.remove(ref) 

530 

531 # Does it exist? 

532 self.assertFalse(datastore.exists(ref)) 

533 

534 # Do we now get a predicted URI? 

535 uri = datastore.getURI(ref, predict=True) 

536 self.assertEqual(uri.fragment, "predicted") 

537 

538 # Get should now fail 

539 with self.assertRaises(FileNotFoundError): 

540 datastore.get(ref) 

541 # Can only delete once 

542 with self.assertRaises(FileNotFoundError): 

543 datastore.remove(ref) 

544 

545 def testForget(self) -> None: 

546 datastore, ref = self.prepDeleteTest() 

547 

548 # Remove 

549 datastore.forget([ref]) 

550 

551 # Does it exist (as far as we know)? 

552 self.assertFalse(datastore.exists(ref)) 

553 

554 # Do we now get a predicted URI? 

555 uri = datastore.getURI(ref, predict=True) 

556 self.assertEqual(uri.fragment, "predicted") 

557 

558 # Get should now fail 

559 with self.assertRaises(FileNotFoundError): 

560 datastore.get(ref) 

561 

562 # Forgetting again is a silent no-op 

563 datastore.forget([ref]) 

564 

565 # Predicted URI should still point to the file. 

566 self.assertTrue(uri.exists()) 

567 

568 def testTransfer(self) -> None: 

569 metrics = makeExampleMetrics() 

570 

571 dimensions = self.universe.extract(("visit", "physical_filter")) 

572 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime", "band": "u"} 

573 

574 sc = self.storageClassFactory.getStorageClass("StructuredData") 

575 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

576 

577 inputDatastore = self.makeDatastore("test_input_datastore") 

578 outputDatastore = self.makeDatastore("test_output_datastore") 

579 

580 inputDatastore.put(metrics, ref) 

581 outputDatastore.transfer(inputDatastore, ref) 

582 

583 metricsOut = outputDatastore.get(ref) 

584 self.assertEqual(metrics, metricsOut) 

585 

586 def testBasicTransaction(self) -> None: 

587 datastore = self.makeDatastore() 

588 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

589 dimensions = self.universe.extract(("visit", "physical_filter")) 

590 nDatasets = 6 

591 dataIds = [ 

592 {"instrument": "dummy", "visit": i, "physical_filter": "V", "band": "v"} for i in range(nDatasets) 

593 ] 

594 data = [ 

595 ( 

596 self.makeDatasetRef("metric", dimensions, storageClass, dataId), 

597 makeExampleMetrics(), 

598 ) 

599 for dataId in dataIds 

600 ] 

601 succeed = data[: nDatasets // 2] 

602 fail = data[nDatasets // 2 :] 

603 # All datasets added in this transaction should continue to exist 

604 with datastore.transaction(): 

605 for ref, metrics in succeed: 

606 datastore.put(metrics, ref) 

607 # Whereas datasets added in this transaction should not 

608 with self.assertRaises(TransactionTestError): 

609 with datastore.transaction(): 

610 for ref, metrics in fail: 

611 datastore.put(metrics, ref) 

612 raise TransactionTestError("This should propagate out of the context manager") 

613 # Check for datasets that should exist 

614 for ref, metrics in succeed: 

615 # Does it exist? 

616 self.assertTrue(datastore.exists(ref)) 

617 # Get 

618 metricsOut = datastore.get(ref, parameters=None) 

619 self.assertEqual(metrics, metricsOut) 

620 # URI 

621 uri = datastore.getURI(ref) 

622 self.assertEqual(uri.scheme, self.uriScheme) 

623 # Check for datasets that should not exist 

624 for ref, _ in fail: 

625 # These should raise 

626 with self.assertRaises(FileNotFoundError): 

627 # non-existing file 

628 datastore.get(ref) 

629 with self.assertRaises(FileNotFoundError): 

630 datastore.getURI(ref) 

631 

632 def testNestedTransaction(self) -> None: 

633 datastore = self.makeDatastore() 

634 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

635 dimensions = self.universe.extract(("visit", "physical_filter")) 

636 metrics = makeExampleMetrics() 

637 

638 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"} 

639 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

640 datastore.put(metrics, refBefore) 

641 with self.assertRaises(TransactionTestError): 

642 with datastore.transaction(): 

643 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V", "band": "v"} 

644 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

645 datastore.put(metrics, refOuter) 

646 with datastore.transaction(): 

647 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V", "band": "v"} 

648 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

649 datastore.put(metrics, refInner) 

650 # All datasets should exist 

651 for ref in (refBefore, refOuter, refInner): 

652 metricsOut = datastore.get(ref, parameters=None) 

653 self.assertEqual(metrics, metricsOut) 

654 raise TransactionTestError("This should roll back the transaction") 

655 # Dataset(s) inserted before the transaction should still exist 

656 metricsOut = datastore.get(refBefore, parameters=None) 

657 self.assertEqual(metrics, metricsOut) 

658 # But all datasets inserted during the (rolled back) transaction 

659 # should be gone 

660 with self.assertRaises(FileNotFoundError): 

661 datastore.get(refOuter) 

662 with self.assertRaises(FileNotFoundError): 

663 datastore.get(refInner) 

664 

665 def _prepareIngestTest(self) -> tuple[MetricsExample, DatasetRef]: 

666 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

667 dimensions = self.universe.extract(("visit", "physical_filter")) 

668 metrics = makeExampleMetrics() 

669 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"} 

670 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

671 return metrics, ref 

672 

673 def runIngestTest( 

674 self, func: Callable[[MetricsExample, str, DatasetRef], None], expectOutput: bool = True 

675 ) -> None: 

676 metrics, ref = self._prepareIngestTest() 

677 # The file will be deleted after the test. 

678 # For symlink tests this leads to a situation where the datastore 

679 # points to a file that does not exist. This will make os.path.exist 

680 # return False but then the new symlink will fail with 

681 # FileExistsError later in the code so the test still passes. 

682 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path: 

683 with open(path, "w") as fd: 

684 yaml.dump(metrics._asdict(), stream=fd) 

685 func(metrics, path, ref) 

686 

687 def testIngestNoTransfer(self) -> None: 

688 """Test ingesting existing files with no transfer.""" 

689 for mode in (None, "auto"): 

690 # Some datastores have auto but can't do in place transfer 

691 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

692 continue 

693 

694 with self.subTest(mode=mode): 

695 datastore = self.makeDatastore() 

696 

697 def succeed( 

698 obj: MetricsExample, 

699 path: str, 

700 ref: DatasetRef, 

701 mode: str | None = mode, 

702 datastore: Datastore = datastore, 

703 ) -> None: 

704 """Ingest a file already in the datastore root.""" 

705 # first move it into the root, and adjust the path 

706 # accordingly 

707 path = shutil.copy(path, datastore.root.ospath) 

708 path = os.path.relpath(path, start=datastore.root.ospath) 

709 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

710 self.assertEqual(obj, datastore.get(ref)) 

711 

712 def failInputDoesNotExist( 

713 obj: MetricsExample, 

714 path: str, 

715 ref: DatasetRef, 

716 mode: str | None = mode, 

717 datastore: Datastore = datastore, 

718 ) -> None: 

719 """Can't ingest files if we're given a bad path.""" 

720 with self.assertRaises(FileNotFoundError): 

721 datastore.ingest( 

722 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode 

723 ) 

724 self.assertFalse(datastore.exists(ref)) 

725 

726 def failOutsideRoot( 

727 obj: MetricsExample, 

728 path: str, 

729 ref: DatasetRef, 

730 mode: str | None = mode, 

731 datastore: Datastore = datastore, 

732 ) -> None: 

733 """Can't ingest files outside of datastore root unless 

734 auto. 

735 """ 

736 if mode == "auto": 

737 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

738 self.assertTrue(datastore.exists(ref)) 

739 else: 

740 with self.assertRaises(RuntimeError): 

741 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

742 self.assertFalse(datastore.exists(ref)) 

743 

744 def failNotImplemented( 

745 obj: MetricsExample, 

746 path: str, 

747 ref: DatasetRef, 

748 mode: str | None = mode, 

749 datastore: Datastore = datastore, 

750 ) -> None: 

751 with self.assertRaises(NotImplementedError): 

752 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

753 

754 if mode in self.ingestTransferModes: 

755 self.runIngestTest(failOutsideRoot) 

756 self.runIngestTest(failInputDoesNotExist) 

757 self.runIngestTest(succeed) 

758 else: 

759 self.runIngestTest(failNotImplemented) 

760 

761 def testIngestTransfer(self) -> None: 

762 """Test ingesting existing files after transferring them.""" 

763 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

764 with self.subTest(mode=mode): 

765 datastore = self.makeDatastore(mode) 

766 

767 def succeed( 

768 obj: MetricsExample, 

769 path: str, 

770 ref: DatasetRef, 

771 mode: str | None = mode, 

772 datastore: Datastore = datastore, 

773 ) -> None: 

774 """Ingest a file by transferring it to the template 

775 location. 

776 """ 

777 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

778 self.assertEqual(obj, datastore.get(ref)) 

779 

780 def failInputDoesNotExist( 

781 obj: MetricsExample, 

782 path: str, 

783 ref: DatasetRef, 

784 mode: str | None = mode, 

785 datastore: Datastore = datastore, 

786 ) -> None: 

787 """Can't ingest files if we're given a bad path.""" 

788 with self.assertRaises(FileNotFoundError): 

789 # Ensure the file does not look like it is in 

790 # datastore for auto mode 

791 datastore.ingest( 

792 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode 

793 ) 

794 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

795 

796 def failNotImplemented( 

797 obj: MetricsExample, 

798 path: str, 

799 ref: DatasetRef, 

800 mode: str | None = mode, 

801 datastore: Datastore = datastore, 

802 ) -> None: 

803 with self.assertRaises(NotImplementedError): 

804 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

805 

806 if mode in self.ingestTransferModes: 

807 self.runIngestTest(failInputDoesNotExist) 

808 self.runIngestTest(succeed, expectOutput=(mode != "move")) 

809 else: 

810 self.runIngestTest(failNotImplemented) 

811 

812 def testIngestSymlinkOfSymlink(self) -> None: 

813 """Special test for symlink to a symlink ingest""" 

814 metrics, ref = self._prepareIngestTest() 

815 # The aim of this test is to create a dataset on disk, then 

816 # create a symlink to it and finally ingest the symlink such that 

817 # the symlink in the datastore points to the original dataset. 

818 for mode in ("symlink", "relsymlink"): 

819 if mode not in self.ingestTransferModes: 

820 continue 

821 

822 print(f"Trying mode {mode}") 

823 with lsst.utils.tests.getTempFilePath(".yaml") as realpath: 

824 with open(realpath, "w") as fd: 

825 yaml.dump(metrics._asdict(), stream=fd) 

826 with lsst.utils.tests.getTempFilePath(".yaml") as sympath: 

827 os.symlink(os.path.abspath(realpath), sympath) 

828 

829 datastore = self.makeDatastore() 

830 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

831 

832 uri = datastore.getURI(ref) 

833 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

834 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

835 

836 linkTarget = os.readlink(uri.ospath) 

837 if mode == "relsymlink": 

838 self.assertFalse(os.path.isabs(linkTarget)) 

839 else: 

840 self.assertEqual(linkTarget, os.path.abspath(realpath)) 

841 

842 # Check that we can get the dataset back regardless of mode 

843 metric2 = datastore.get(ref) 

844 self.assertEqual(metric2, metrics) 

845 

846 # Cleanup the file for next time round loop 

847 # since it will get the same file name in store 

848 datastore.remove(ref) 

849 

850 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]: 

851 datastore = self.makeDatastore(name) 

852 

853 # For now only the FileDatastore can be used for this test. 

854 # ChainedDatastore that only includes InMemoryDatastores have to be 

855 # skipped as well. 

856 for name in datastore.names: 

857 if not name.startswith("InMemoryDatastore"): 

858 break 

859 else: 

860 raise unittest.SkipTest("in-memory datastore does not support record export/import") 

861 

862 metrics = makeExampleMetrics() 

863 dimensions = self.universe.extract(("visit", "physical_filter")) 

864 sc = self.storageClassFactory.getStorageClass("StructuredData") 

865 

866 refs = [] 

867 for visit in (2048, 2049, 2050): 

868 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime", "band": "u"} 

869 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

870 datastore.put(metrics, ref) 

871 refs.append(ref) 

872 return datastore, refs 

873 

874 def testExportImportRecords(self) -> None: 

875 """Test for export_records and import_records methods.""" 

876 datastore, refs = self._populate_export_datastore("test_datastore") 

877 for exported_refs in (refs, refs[1:]): 

878 n_refs = len(exported_refs) 

879 records = datastore.export_records(exported_refs) 

880 self.assertGreater(len(records), 0) 

881 self.assertTrue(set(records.keys()) <= set(datastore.names)) 

882 # In a ChainedDatastore each FileDatastore will have a complete set 

883 for datastore_name in records: 

884 record_data = records[datastore_name] 

885 self.assertEqual(len(record_data.records), n_refs) 

886 

887 # Check that subsetting works, include non-existing dataset ID. 

888 dataset_ids = {exported_refs[0].id, uuid.uuid4()} 

889 subset = record_data.subset(dataset_ids) 

890 assert subset is not None 

891 self.assertEqual(len(subset.records), 1) 

892 subset = record_data.subset({uuid.uuid4()}) 

893 self.assertIsNone(subset) 

894 

895 # Use the same datastore name to import relative path. 

896 datastore2 = self.makeDatastore("test_datastore") 

897 

898 records = datastore.export_records(refs[1:]) 

899 datastore2.import_records(records) 

900 

901 with self.assertRaises(FileNotFoundError): 

902 data = datastore2.get(refs[0]) 

903 data = datastore2.get(refs[1]) 

904 self.assertIsNotNone(data) 

905 data = datastore2.get(refs[2]) 

906 self.assertIsNotNone(data) 

907 

908 def testExport(self) -> None: 

909 datastore, refs = self._populate_export_datastore("test_datastore") 

910 

911 datasets = list(datastore.export(refs)) 

912 self.assertEqual(len(datasets), 3) 

913 

914 for transfer in (None, "auto"): 

915 # Both will default to None 

916 datasets = list(datastore.export(refs, transfer=transfer)) 

917 self.assertEqual(len(datasets), 3) 

918 

919 with self.assertRaises(TypeError): 

920 list(datastore.export(refs, transfer="copy")) 

921 

922 with self.assertRaises(TypeError): 

923 list(datastore.export(refs, directory="exportDir", transfer="move")) 

924 

925 # Create a new ref that is not known to the datastore and try to 

926 # export it. 

927 sc = self.storageClassFactory.getStorageClass("ThingOne") 

928 dimensions = self.universe.extract(("visit", "physical_filter")) 

929 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

930 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

931 with self.assertRaises(FileNotFoundError): 

932 list(datastore.export(refs + [ref], transfer=None)) 

933 

934 def test_pydantic_dict_storage_class_conversions(self) -> None: 

935 """Test converting a dataset stored as a pydantic model into a dict on 

936 read. 

937 """ 

938 datastore = self.makeDatastore() 

939 store_as_model = self.makeDatasetRef( 

940 "store_as_model", 

941 dimensions=self.universe.empty, 

942 storageClass="DictConvertibleModel", 

943 dataId=DataCoordinate.makeEmpty(self.universe), 

944 ) 

945 content = {"a": "one", "b": "two"} 

946 model = DictConvertibleModel.from_dict(content, extra="original content") 

947 datastore.put(model, store_as_model) 

948 retrieved_model = datastore.get(store_as_model) 

949 self.assertEqual(retrieved_model, model) 

950 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel")) 

951 self.assertEqual(type(loaded), dict) 

952 self.assertEqual(loaded, content) 

953 

954 def test_simple_class_put_get(self) -> None: 

955 """Test that we can put and get a simple class with dict() 

956 constructor. 

957 """ 

958 datastore = self.makeDatastore() 

959 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

960 self._assert_different_puts(datastore, "MetricsExample", data) 

961 

962 def test_dataclass_put_get(self) -> None: 

963 """Test that we can put and get a simple dataclass.""" 

964 datastore = self.makeDatastore() 

965 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

966 self._assert_different_puts(datastore, "MetricsExampleDataclass", data) 

967 

968 def test_pydantic_put_get(self) -> None: 

969 """Test that we can put and get a simple Pydantic model.""" 

970 datastore = self.makeDatastore() 

971 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

972 self._assert_different_puts(datastore, "MetricsExampleModel", data) 

973 

974 def test_tuple_put_get(self) -> None: 

975 """Test that we can put and get a tuple.""" 

976 datastore = self.makeDatastore() 

977 data = ("a", "b", 1) 

978 self._assert_different_puts(datastore, "TupleExample", data) 

979 

980 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data: Any) -> None: 

981 refs = { 

982 x: self.makeDatasetRef( 

983 f"stora_as_{x}", 

984 dimensions=self.universe.empty, 

985 storageClass=f"{storageClass_root}{x}", 

986 dataId=DataCoordinate.makeEmpty(self.universe), 

987 ) 

988 for x in ["A", "B"] 

989 } 

990 

991 for ref in refs.values(): 

992 datastore.put(data, ref) 

993 

994 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"])) 

995 

996 

997class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

998 """PosixDatastore specialization""" 

999 

1000 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1001 uriScheme = "file" 

1002 canIngestNoTransferAuto = True 

1003 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

1004 isEphemeral = False 

1005 rootKeys = ("root",) 

1006 validationCanFail = True 

1007 

1008 def setUp(self) -> None: 

1009 # Override the working directory before calling the base class 

1010 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1011 super().setUp() 

1012 

1013 def testAtomicWrite(self) -> None: 

1014 """Test that we write to a temporary and then rename""" 

1015 datastore = self.makeDatastore() 

1016 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1017 dimensions = self.universe.extract(("visit", "physical_filter")) 

1018 metrics = makeExampleMetrics() 

1019 

1020 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"} 

1021 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1022 

1023 with self.assertLogs("lsst.resources", "DEBUG") as cm: 

1024 datastore.put(metrics, ref) 

1025 move_logs = [ll for ll in cm.output if "transfer=" in ll] 

1026 self.assertIn("transfer=move", move_logs[0]) 

1027 

1028 # And the transfer should be file to file. 

1029 self.assertEqual(move_logs[0].count("file://"), 2) 

1030 

1031 def testCanNotDeterminePutFormatterLocation(self) -> None: 

1032 """Verify that the expected exception is raised if the FileDatastore 

1033 can not determine the put formatter location. 

1034 """ 

1035 _ = makeExampleMetrics() 

1036 datastore = self.makeDatastore() 

1037 

1038 # Create multiple storage classes for testing different formulations 

1039 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1040 

1041 sccomp = StorageClass("Dummy") 

1042 compositeStorageClass = StorageClass( 

1043 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp} 

1044 ) 

1045 

1046 dimensions = self.universe.extract(("visit", "physical_filter")) 

1047 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

1048 

1049 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1050 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId) 

1051 

1052 def raiser(ref: DatasetRef) -> None: 

1053 raise DatasetTypeNotSupportedError() 

1054 

1055 with unittest.mock.patch.object( 

1056 lsst.daf.butler.datastores.fileDatastore.FileDatastore, 

1057 "_determine_put_formatter_location", 

1058 side_effect=raiser, 

1059 ): 

1060 # verify the non-composite ref execution path: 

1061 with self.assertRaises(DatasetTypeNotSupportedError): 

1062 datastore.getURIs(ref, predict=True) 

1063 

1064 # verify the composite-ref execution path: 

1065 with self.assertRaises(DatasetTypeNotSupportedError): 

1066 datastore.getURIs(compRef, predict=True) 

1067 

1068 def test_roots(self): 

1069 datastore = self.makeDatastore() 

1070 

1071 self.assertEqual(set(datastore.names), set(datastore.roots.keys())) 

1072 for root in datastore.roots.values(): 

1073 if root is not None: 

1074 self.assertTrue(root.exists()) 

1075 

1076 

1077class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

1078 """Posix datastore tests but with checksums disabled.""" 

1079 

1080 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

1081 

1082 def testChecksum(self) -> None: 

1083 """Ensure that checksums have not been calculated.""" 

1084 datastore = self.makeDatastore() 

1085 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1086 dimensions = self.universe.extract(("visit", "physical_filter")) 

1087 metrics = makeExampleMetrics() 

1088 

1089 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"} 

1090 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1091 

1092 # Configuration should have disabled checksum calculation 

1093 datastore.put(metrics, ref) 

1094 infos = datastore.getStoredItemsInfo(ref) 

1095 self.assertIsNone(infos[0].checksum) 

1096 

1097 # Remove put back but with checksums enabled explicitly 

1098 datastore.remove(ref) 

1099 datastore.useChecksum = True 

1100 datastore.put(metrics, ref) 

1101 

1102 infos = datastore.getStoredItemsInfo(ref) 

1103 self.assertIsNotNone(infos[0].checksum) 

1104 

1105 def test_repeat_ingest(self): 

1106 """Test that repeatedly ingesting the same file in direct mode 

1107 is allowed. 

1108 

1109 Test can only run with FileDatastore since that is the only one 

1110 supporting "direct" ingest. 

1111 """ 

1112 metrics, v4ref = self._prepareIngestTest() 

1113 datastore = self.makeDatastore() 

1114 v5ref = DatasetRef( 

1115 v4ref.datasetType, v4ref.dataId, v4ref.run, id_generation_mode=DatasetIdGenEnum.DATAID_TYPE_RUN 

1116 ) 

1117 

1118 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=True) as path: 

1119 with open(path, "w") as fd: 

1120 yaml.dump(metrics._asdict(), stream=fd) 

1121 

1122 datastore.ingest(FileDataset(path=path, refs=v4ref), transfer="direct") 

1123 

1124 # This will fail because the ref is using UUIDv4. 

1125 with self.assertRaises(RuntimeError): 

1126 datastore.ingest(FileDataset(path=path, refs=v4ref), transfer="direct") 

1127 

1128 # UUIDv5 can be repeatedly ingested in direct mode. 

1129 datastore.ingest(FileDataset(path=path, refs=v5ref), transfer="direct") 

1130 datastore.ingest(FileDataset(path=path, refs=v5ref), transfer="direct") 

1131 

1132 with self.assertRaises(RuntimeError): 

1133 datastore.ingest(FileDataset(path=path, refs=v5ref), transfer="copy") 

1134 

1135 

1136class TrashDatastoreTestCase(PosixDatastoreTestCase): 

1137 """Restrict trash test to FileDatastore.""" 

1138 

1139 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1140 

1141 def testTrash(self) -> None: 

1142 datastore, *refs = self.prepDeleteTest(n_refs=10) 

1143 

1144 # Trash one of them. 

1145 ref = refs.pop() 

1146 uri = datastore.getURI(ref) 

1147 datastore.trash(ref) 

1148 self.assertTrue(uri.exists(), uri) # Not deleted yet 

1149 datastore.emptyTrash() 

1150 self.assertFalse(uri.exists(), uri) 

1151 

1152 # Trash it again should be fine. 

1153 datastore.trash(ref) 

1154 

1155 # Trash multiple items at once. 

1156 subset = [refs.pop(), refs.pop()] 

1157 datastore.trash(subset) 

1158 datastore.emptyTrash() 

1159 

1160 # Remove a record and trash should do nothing. 

1161 # This is execution butler scenario. 

1162 ref = refs.pop() 

1163 uri = datastore.getURI(ref) 

1164 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

1165 self.assertTrue(uri.exists()) 

1166 datastore.trash(ref) 

1167 datastore.emptyTrash() 

1168 self.assertTrue(uri.exists()) 

1169 

1170 # Switch on trust and it should delete the file. 

1171 datastore.trustGetRequest = True 

1172 datastore.trash([ref]) 

1173 self.assertFalse(uri.exists()) 

1174 

1175 # Remove multiples at once in trust mode. 

1176 subset = [refs.pop() for i in range(3)] 

1177 datastore.trash(subset) 

1178 datastore.trash(refs.pop()) # Check that a single ref can trash 

1179 

1180 

1181class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

1182 """Test datastore cleans up on failure.""" 

1183 

1184 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1185 

1186 def setUp(self) -> None: 

1187 # Override the working directory before calling the base class 

1188 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1189 super().setUp() 

1190 

1191 def testCleanup(self) -> None: 

1192 """Test that a failed formatter write does cleanup a partial file.""" 

1193 metrics = makeExampleMetrics() 

1194 datastore = self.makeDatastore() 

1195 

1196 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1197 

1198 dimensions = self.universe.extract(("visit", "physical_filter")) 

1199 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

1200 

1201 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1202 

1203 # Determine where the file will end up (we assume Formatters use 

1204 # the same file extension) 

1205 expectedUri = datastore.getURI(ref, predict=True) 

1206 self.assertEqual(expectedUri.fragment, "predicted") 

1207 

1208 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}") 

1209 

1210 # Try formatter that fails and formatter that fails and leaves 

1211 # a file behind 

1212 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

1213 with self.subTest(formatter=formatter): 

1214 # Monkey patch the formatter 

1215 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True) 

1216 

1217 # Try to put the dataset, it should fail 

1218 with self.assertRaises(RuntimeError): 

1219 datastore.put(metrics, ref) 

1220 

1221 # Check that there is no file on disk 

1222 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1223 

1224 # Check that there is a directory 

1225 dir = expectedUri.dirname() 

1226 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}") 

1227 

1228 # Force YamlFormatter and check that this time a file is written 

1229 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True) 

1230 datastore.put(metrics, ref) 

1231 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1232 datastore.remove(ref) 

1233 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

1234 

1235 

1236class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1237 """PosixDatastore specialization""" 

1238 

1239 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

1240 uriScheme = "mem" 

1241 hasUnsupportedPut = False 

1242 ingestTransferModes = () 

1243 isEphemeral = True 

1244 rootKeys = None 

1245 validationCanFail = False 

1246 

1247 

1248class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

1249 """ChainedDatastore specialization using a POSIXDatastore""" 

1250 

1251 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

1252 hasUnsupportedPut = False 

1253 canIngestNoTransferAuto = False 

1254 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto") 

1255 isEphemeral = False 

1256 rootKeys = (".datastores.1.root", ".datastores.2.root") 

1257 validationCanFail = True 

1258 

1259 

1260class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

1261 """ChainedDatastore specialization using all InMemoryDatastore""" 

1262 

1263 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

1264 validationCanFail = False 

1265 

1266 

1267class DatastoreConstraintsTests(DatastoreTestsBase): 

1268 """Basic tests of constraints model of Datastores.""" 

1269 

1270 def testConstraints(self) -> None: 

1271 """Test constraints model. Assumes that each test class has the 

1272 same constraints. 

1273 """ 

1274 metrics = makeExampleMetrics() 

1275 datastore = self.makeDatastore() 

1276 

1277 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1278 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1279 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1280 dataId = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"} 

1281 

1282 # Write empty file suitable for ingest check (JSON and YAML variants) 

1283 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1284 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1285 for datasetTypeName, sc, accepted in ( 

1286 ("metric", sc1, True), 

1287 ("metric5", sc1, False), 

1288 ("metric33", sc1, True), 

1289 ("metric5", sc2, True), 

1290 ): 

1291 # Choose different temp file depending on StorageClass 

1292 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1293 

1294 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

1295 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId) 

1296 if accepted: 

1297 datastore.put(metrics, ref) 

1298 self.assertTrue(datastore.exists(ref)) 

1299 datastore.remove(ref) 

1300 

1301 # Try ingest 

1302 if self.canIngest: 

1303 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1304 self.assertTrue(datastore.exists(ref)) 

1305 datastore.remove(ref) 

1306 else: 

1307 with self.assertRaises(DatasetTypeNotSupportedError): 

1308 datastore.put(metrics, ref) 

1309 self.assertFalse(datastore.exists(ref)) 

1310 

1311 # Again with ingest 

1312 if self.canIngest: 

1313 with self.assertRaises(DatasetTypeNotSupportedError): 

1314 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1315 self.assertFalse(datastore.exists(ref)) 

1316 

1317 

1318class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1319 """PosixDatastore specialization""" 

1320 

1321 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

1322 canIngest = True 

1323 

1324 def setUp(self) -> None: 

1325 # Override the working directory before calling the base class 

1326 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1327 super().setUp() 

1328 

1329 

1330class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1331 """InMemoryDatastore specialization.""" 

1332 

1333 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

1334 canIngest = False 

1335 

1336 

1337class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

1338 """ChainedDatastore specialization using a POSIXDatastore and constraints 

1339 at the ChainedDatstore. 

1340 """ 

1341 

1342 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

1343 

1344 

1345class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

1346 """ChainedDatastore specialization using a POSIXDatastore.""" 

1347 

1348 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

1349 

1350 

1351class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

1352 """ChainedDatastore specialization using all InMemoryDatastore.""" 

1353 

1354 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

1355 canIngest = False 

1356 

1357 

1358class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

1359 """Test that a chained datastore can control constraints per-datastore 

1360 even if child datastore would accept. 

1361 """ 

1362 

1363 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

1364 

1365 def setUp(self) -> None: 

1366 # Override the working directory before calling the base class 

1367 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1368 super().setUp() 

1369 

1370 def testConstraints(self) -> None: 

1371 """Test chained datastore constraints model.""" 

1372 metrics = makeExampleMetrics() 

1373 datastore = self.makeDatastore() 

1374 

1375 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1376 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1377 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1378 dataId1 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"} 

1379 dataId2 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "HSC"} 

1380 

1381 # Write empty file suitable for ingest check (JSON and YAML variants) 

1382 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1383 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1384 

1385 for typeName, dataId, sc, accept, ingest in ( 

1386 ("metric", dataId1, sc1, (False, True, False), True), 

1387 ("metric5", dataId1, sc1, (False, False, False), False), 

1388 ("metric5", dataId2, sc1, (True, False, False), False), 

1389 ("metric33", dataId2, sc2, (True, True, False), True), 

1390 ("metric5", dataId1, sc2, (False, True, False), True), 

1391 ): 

1392 # Choose different temp file depending on StorageClass 

1393 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1394 

1395 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

1396 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId) 

1397 if any(accept): 

1398 datastore.put(metrics, ref) 

1399 self.assertTrue(datastore.exists(ref)) 

1400 

1401 # Check each datastore inside the chained datastore 

1402 for childDatastore, expected in zip(datastore.datastores, accept, strict=True): 

1403 self.assertEqual( 

1404 childDatastore.exists(ref), 

1405 expected, 

1406 f"Testing presence of {ref} in datastore {childDatastore.name}", 

1407 ) 

1408 

1409 datastore.remove(ref) 

1410 

1411 # Check that ingest works 

1412 if ingest: 

1413 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1414 self.assertTrue(datastore.exists(ref)) 

1415 

1416 # Check each datastore inside the chained datastore 

1417 for childDatastore, expected in zip(datastore.datastores, accept, strict=True): 

1418 # Ephemeral datastores means InMemory at the moment 

1419 # and that does not accept ingest of files. 

1420 if childDatastore.isEphemeral: 

1421 expected = False 

1422 self.assertEqual( 

1423 childDatastore.exists(ref), 

1424 expected, 

1425 f"Testing presence of ingested {ref} in datastore {childDatastore.name}", 

1426 ) 

1427 

1428 datastore.remove(ref) 

1429 else: 

1430 with self.assertRaises(DatasetTypeNotSupportedError): 

1431 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1432 

1433 else: 

1434 with self.assertRaises(DatasetTypeNotSupportedError): 

1435 datastore.put(metrics, ref) 

1436 self.assertFalse(datastore.exists(ref)) 

1437 

1438 # Again with ingest 

1439 with self.assertRaises(DatasetTypeNotSupportedError): 

1440 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1441 self.assertFalse(datastore.exists(ref)) 

1442 

1443 

1444class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase): 

1445 """Tests for datastore caching infrastructure.""" 

1446 

1447 @classmethod 

1448 def setUpClass(cls) -> None: 

1449 cls.storageClassFactory = StorageClassFactory() 

1450 cls.universe = DimensionUniverse() 

1451 

1452 # Ensure that we load the test storage class definitions. 

1453 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

1454 cls.storageClassFactory.addFromConfig(scConfigFile) 

1455 

1456 def setUp(self) -> None: 

1457 self.id = 0 

1458 

1459 # Create a root that we can use for caching tests. 

1460 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1461 

1462 # Create some test dataset refs and associated test files 

1463 sc = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1464 dimensions = self.universe.extract(("visit", "physical_filter")) 

1465 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

1466 

1467 # Create list of refs and list of temporary files 

1468 n_datasets = 10 

1469 self.refs = [self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId) for n in range(n_datasets)] 

1470 

1471 root_uri = ResourcePath(self.root, forceDirectory=True) 

1472 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)] 

1473 

1474 # Create test files. 

1475 for uri in self.files: 

1476 uri.write(b"0123456789") 

1477 

1478 # Create some composite refs with component files. 

1479 sc = self.storageClassFactory.getStorageClass("StructuredData") 

1480 self.composite_refs = [self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId) for n in range(3)] 

1481 self.comp_files = [] 

1482 self.comp_refs = [] 

1483 for n, ref in enumerate(self.composite_refs): 

1484 component_refs = [] 

1485 component_files = [] 

1486 for component in sc.components: 

1487 component_ref = ref.makeComponentRef(component) 

1488 file = root_uri.join(f"composite_file-{n}-{component}.txt") 

1489 component_refs.append(component_ref) 

1490 component_files.append(file) 

1491 file.write(b"9876543210") 

1492 

1493 self.comp_files.append(component_files) 

1494 self.comp_refs.append(component_refs) 

1495 

1496 def tearDown(self) -> None: 

1497 if self.root is not None and os.path.exists(self.root): 

1498 shutil.rmtree(self.root, ignore_errors=True) 

1499 

1500 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager: 

1501 config = Config.fromYaml(config_str) 

1502 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe) 

1503 

1504 def testNoCacheDir(self) -> None: 

1505 config_str = """ 

1506cached: 

1507 root: null 

1508 cacheable: 

1509 metric0: true 

1510 """ 

1511 cache_manager = self._make_cache_manager(config_str) 

1512 

1513 # Look inside to check we don't have a cache directory 

1514 self.assertIsNone(cache_manager._cache_directory) 

1515 

1516 self.assertCache(cache_manager) 

1517 

1518 # Test that the cache directory is marked temporary 

1519 self.assertTrue(cache_manager.cache_directory.isTemporary) 

1520 

1521 def testNoCacheDirReversed(self) -> None: 

1522 """Use default caching status and metric1 to false""" 

1523 config_str = """ 

1524cached: 

1525 root: null 

1526 default: true 

1527 cacheable: 

1528 metric1: false 

1529 """ 

1530 cache_manager = self._make_cache_manager(config_str) 

1531 

1532 self.assertCache(cache_manager) 

1533 

1534 def testEnvvarCacheDir(self) -> None: 

1535 config_str = f""" 

1536cached: 

1537 root: '{self.root}' 

1538 cacheable: 

1539 metric0: true 

1540 """ 

1541 

1542 root = ResourcePath(self.root, forceDirectory=True) 

1543 env_dir = root.join("somewhere", forceDirectory=True) 

1544 elsewhere = root.join("elsewhere", forceDirectory=True) 

1545 

1546 # Environment variable should override the config value. 

1547 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}): 

1548 cache_manager = self._make_cache_manager(config_str) 

1549 self.assertEqual(cache_manager.cache_directory, env_dir) 

1550 

1551 # This environment variable should not override the config value. 

1552 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1553 cache_manager = self._make_cache_manager(config_str) 

1554 self.assertEqual(cache_manager.cache_directory, root) 

1555 

1556 # No default setting. 

1557 config_str = """ 

1558cached: 

1559 root: null 

1560 default: true 

1561 cacheable: 

1562 metric1: false 

1563 """ 

1564 cache_manager = self._make_cache_manager(config_str) 

1565 

1566 # This environment variable should override the config value. 

1567 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1568 cache_manager = self._make_cache_manager(config_str) 

1569 self.assertEqual(cache_manager.cache_directory, env_dir) 

1570 

1571 # If both environment variables are set the main (not IF_UNSET) 

1572 # variable should win. 

1573 with unittest.mock.patch.dict( 

1574 os.environ, 

1575 { 

1576 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath, 

1577 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath, 

1578 }, 

1579 ): 

1580 cache_manager = self._make_cache_manager(config_str) 

1581 self.assertEqual(cache_manager.cache_directory, env_dir) 

1582 

1583 # Use the API to set the environment variable, making sure that the 

1584 # variable is reset on exit. 

1585 with unittest.mock.patch.dict( 

1586 os.environ, 

1587 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1588 ): 

1589 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1590 self.assertTrue(defined) 

1591 cache_manager = self._make_cache_manager(config_str) 

1592 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1593 

1594 # Now create the cache manager ahead of time and set the fallback 

1595 # later. 

1596 cache_manager = self._make_cache_manager(config_str) 

1597 self.assertIsNone(cache_manager._cache_directory) 

1598 with unittest.mock.patch.dict( 

1599 os.environ, 

1600 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1601 ): 

1602 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1603 self.assertTrue(defined) 

1604 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1605 

1606 def testExplicitCacheDir(self) -> None: 

1607 config_str = f""" 

1608cached: 

1609 root: '{self.root}' 

1610 cacheable: 

1611 metric0: true 

1612 """ 

1613 cache_manager = self._make_cache_manager(config_str) 

1614 

1615 # Look inside to check we do have a cache directory. 

1616 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True)) 

1617 

1618 self.assertCache(cache_manager) 

1619 

1620 # Test that the cache directory is not marked temporary 

1621 self.assertFalse(cache_manager.cache_directory.isTemporary) 

1622 

1623 def assertCache(self, cache_manager: DatastoreCacheManager) -> None: 

1624 self.assertTrue(cache_manager.should_be_cached(self.refs[0])) 

1625 self.assertFalse(cache_manager.should_be_cached(self.refs[1])) 

1626 

1627 uri = cache_manager.move_to_cache(self.files[0], self.refs[0]) 

1628 self.assertIsInstance(uri, ResourcePath) 

1629 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1])) 

1630 

1631 # Check presence in cache using ref and then using file extension. 

1632 self.assertFalse(cache_manager.known_to_cache(self.refs[1])) 

1633 self.assertTrue(cache_manager.known_to_cache(self.refs[0])) 

1634 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension())) 

1635 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension())) 

1636 

1637 # Cached file should no longer exist but uncached file should be 

1638 # unaffected. 

1639 self.assertFalse(self.files[0].exists()) 

1640 self.assertTrue(self.files[1].exists()) 

1641 

1642 # Should find this file and it should be within the cache directory. 

1643 with cache_manager.find_in_cache(self.refs[0], ".txt") as found: 

1644 self.assertTrue(found.exists()) 

1645 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory)) 

1646 

1647 # Should not be able to find these in cache 

1648 with cache_manager.find_in_cache(self.refs[0], ".fits") as found: 

1649 self.assertIsNone(found) 

1650 with cache_manager.find_in_cache(self.refs[1], ".fits") as found: 

1651 self.assertIsNone(found) 

1652 

1653 def testNoCache(self) -> None: 

1654 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe) 

1655 for uri, ref in zip(self.files, self.refs, strict=True): 

1656 self.assertFalse(cache_manager.should_be_cached(ref)) 

1657 self.assertIsNone(cache_manager.move_to_cache(uri, ref)) 

1658 self.assertFalse(cache_manager.known_to_cache(ref)) 

1659 with cache_manager.find_in_cache(ref, ".txt") as found: 

1660 self.assertIsNone(found, msg=f"{cache_manager}") 

1661 

1662 def _expiration_config(self, mode: str, threshold: int) -> str: 

1663 return f""" 

1664cached: 

1665 default: true 

1666 expiry: 

1667 mode: {mode} 

1668 threshold: {threshold} 

1669 cacheable: 

1670 unused: true 

1671 """ 

1672 

1673 def testCacheExpiryFiles(self) -> None: 

1674 threshold = 2 # Keep at least 2 files. 

1675 mode = "files" 

1676 config_str = self._expiration_config(mode, threshold) 

1677 

1678 cache_manager = self._make_cache_manager(config_str) 

1679 

1680 # Check that an empty cache returns unknown for arbitrary ref 

1681 self.assertFalse(cache_manager.known_to_cache(self.refs[0])) 

1682 

1683 # Should end with datasets: 2, 3, 4 

1684 self.assertExpiration(cache_manager, 5, threshold + 1) 

1685 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1686 

1687 # Check that we will not expire a file that is actively in use. 

1688 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1689 self.assertIsNotNone(found) 

1690 

1691 # Trigger cache expiration that should remove the file 

1692 # we just retrieved. Should now have: 3, 4, 5 

1693 cached = cache_manager.move_to_cache(self.files[5], self.refs[5]) 

1694 self.assertIsNotNone(cached) 

1695 

1696 # Cache should still report the standard file count. 

1697 self.assertEqual(cache_manager.file_count, threshold + 1) 

1698 

1699 # Add additional entry to cache. 

1700 # Should now have 4, 5, 6 

1701 cached = cache_manager.move_to_cache(self.files[6], self.refs[6]) 

1702 self.assertIsNotNone(cached) 

1703 

1704 # Is the file still there? 

1705 self.assertTrue(found.exists()) 

1706 

1707 # Can we read it? 

1708 data = found.read() 

1709 self.assertGreater(len(data), 0) 

1710 

1711 # Outside context the file should no longer exist. 

1712 self.assertFalse(found.exists()) 

1713 

1714 # File count should not have changed. 

1715 self.assertEqual(cache_manager.file_count, threshold + 1) 

1716 

1717 # Dataset 2 was in the exempt directory but because hardlinks 

1718 # are used it was deleted from the main cache during cache expiry 

1719 # above and so should no longer be found. 

1720 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1721 self.assertIsNone(found) 

1722 

1723 # And the one stored after it is also gone. 

1724 with cache_manager.find_in_cache(self.refs[3], ".txt") as found: 

1725 self.assertIsNone(found) 

1726 

1727 # But dataset 4 is present. 

1728 with cache_manager.find_in_cache(self.refs[4], ".txt") as found: 

1729 self.assertIsNotNone(found) 

1730 

1731 # Adding a new dataset to the cache should now delete it. 

1732 cache_manager.move_to_cache(self.files[7], self.refs[7]) 

1733 

1734 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1735 self.assertIsNone(found) 

1736 

1737 def testCacheExpiryDatasets(self) -> None: 

1738 threshold = 2 # Keep 2 datasets. 

1739 mode = "datasets" 

1740 config_str = self._expiration_config(mode, threshold) 

1741 

1742 cache_manager = self._make_cache_manager(config_str) 

1743 self.assertExpiration(cache_manager, 5, threshold + 1) 

1744 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1745 

1746 def testCacheExpiryDatasetsComposite(self) -> None: 

1747 threshold = 2 # Keep 2 datasets. 

1748 mode = "datasets" 

1749 config_str = self._expiration_config(mode, threshold) 

1750 

1751 cache_manager = self._make_cache_manager(config_str) 

1752 

1753 n_datasets = 3 

1754 for i in range(n_datasets): 

1755 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i], strict=True): 

1756 cached = cache_manager.move_to_cache(component_file, component_ref) 

1757 self.assertIsNotNone(cached) 

1758 self.assertTrue(cache_manager.known_to_cache(component_ref)) 

1759 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef())) 

1760 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension())) 

1761 

1762 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files 

1763 

1764 # Write two new non-composite and the number of files should drop. 

1765 self.assertExpiration(cache_manager, 2, 5) 

1766 

1767 def testCacheExpirySize(self) -> None: 

1768 threshold = 55 # Each file is 10 bytes 

1769 mode = "size" 

1770 config_str = self._expiration_config(mode, threshold) 

1771 

1772 cache_manager = self._make_cache_manager(config_str) 

1773 self.assertExpiration(cache_manager, 10, 6) 

1774 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1775 

1776 def assertExpiration( 

1777 self, cache_manager: DatastoreCacheManager, n_datasets: int, n_retained: int 

1778 ) -> None: 

1779 """Insert the datasets and then check the number retained.""" 

1780 for i in range(n_datasets): 

1781 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1782 self.assertIsNotNone(cached) 

1783 

1784 self.assertEqual(cache_manager.file_count, n_retained) 

1785 

1786 # The oldest file should not be in the cache any more. 

1787 for i in range(n_datasets): 

1788 with cache_manager.find_in_cache(self.refs[i], ".txt") as found: 

1789 if i >= n_datasets - n_retained: 

1790 self.assertIsInstance(found, ResourcePath) 

1791 else: 

1792 self.assertIsNone(found) 

1793 

1794 def testCacheExpiryAge(self) -> None: 

1795 threshold = 1 # Expire older than 2 seconds 

1796 mode = "age" 

1797 config_str = self._expiration_config(mode, threshold) 

1798 

1799 cache_manager = self._make_cache_manager(config_str) 

1800 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1801 

1802 # Insert 3 files, then sleep, then insert more. 

1803 for i in range(2): 

1804 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1805 self.assertIsNotNone(cached) 

1806 time.sleep(2.0) 

1807 for j in range(4): 

1808 i = 2 + j # Continue the counting 

1809 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1810 self.assertIsNotNone(cached) 

1811 

1812 # Only the files written after the sleep should exist. 

1813 self.assertEqual(cache_manager.file_count, 4) 

1814 with cache_manager.find_in_cache(self.refs[1], ".txt") as found: 

1815 self.assertIsNone(found) 

1816 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1817 self.assertIsInstance(found, ResourcePath) 

1818 

1819 

1820class NullDatastoreTestCase(DatasetTestHelper, unittest.TestCase): 

1821 """Test the null datastore.""" 

1822 

1823 storageClassFactory = StorageClassFactory() 

1824 

1825 def test_basics(self) -> None: 

1826 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1827 ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}) 

1828 

1829 null = NullDatastore(None, None) 

1830 

1831 self.assertFalse(null.exists(ref)) 

1832 self.assertFalse(null.knows(ref)) 

1833 knows = null.knows_these([ref]) 

1834 self.assertFalse(knows[ref]) 

1835 null.validateConfiguration(ref) 

1836 

1837 with self.assertRaises(FileNotFoundError): 

1838 null.get(ref) 

1839 with self.assertRaises(NotImplementedError): 

1840 null.put("", ref) 

1841 with self.assertRaises(FileNotFoundError): 

1842 null.getURI(ref) 

1843 with self.assertRaises(FileNotFoundError): 

1844 null.getURIs(ref) 

1845 with self.assertRaises(FileNotFoundError): 

1846 null.getManyURIs([ref]) 

1847 with self.assertRaises(NotImplementedError): 

1848 null.getLookupKeys() 

1849 with self.assertRaises(NotImplementedError): 

1850 null.import_records({}) 

1851 with self.assertRaises(NotImplementedError): 

1852 null.export_records([]) 

1853 with self.assertRaises(NotImplementedError): 

1854 null.export([ref]) 

1855 with self.assertRaises(NotImplementedError): 

1856 null.transfer(null, ref) 

1857 with self.assertRaises(NotImplementedError): 

1858 null.emptyTrash() 

1859 with self.assertRaises(NotImplementedError): 

1860 null.trash(ref) 

1861 with self.assertRaises(NotImplementedError): 

1862 null.forget([ref]) 

1863 with self.assertRaises(NotImplementedError): 

1864 null.remove(ref) 

1865 with self.assertRaises(NotImplementedError): 

1866 null.retrieveArtifacts([ref], ResourcePath(".")) 

1867 with self.assertRaises(NotImplementedError): 

1868 null.transfer_from(null, [ref]) 

1869 with self.assertRaises(NotImplementedError): 

1870 null.ingest() 

1871 

1872 

1873class DatasetRefURIsTestCase(unittest.TestCase): 

1874 """Tests for DatasetRefURIs.""" 

1875 

1876 def testSequenceAccess(self) -> None: 

1877 """Verify that DatasetRefURIs can be treated like a two-item tuple.""" 

1878 uris = DatasetRefURIs() 

1879 

1880 self.assertEqual(len(uris), 2) 

1881 self.assertEqual(uris[0], None) 

1882 self.assertEqual(uris[1], {}) 

1883 

1884 primaryURI = ResourcePath("1/2/3") 

1885 componentURI = ResourcePath("a/b/c") 

1886 

1887 # affirm that DatasetRefURIs does not support MutableSequence functions 

1888 with self.assertRaises(TypeError): 

1889 uris[0] = primaryURI 

1890 with self.assertRaises(TypeError): 

1891 uris[1] = {"foo": componentURI} 

1892 

1893 # but DatasetRefURIs can be set by property name: 

1894 uris.primaryURI = primaryURI 

1895 uris.componentURIs = {"foo": componentURI} 

1896 self.assertEqual(uris.primaryURI, primaryURI) 

1897 self.assertEqual(uris[0], primaryURI) 

1898 

1899 primary, components = uris 

1900 self.assertEqual(primary, primaryURI) 

1901 self.assertEqual(components, {"foo": componentURI}) 

1902 

1903 def testRepr(self) -> None: 

1904 """Verify __repr__ output.""" 

1905 uris = DatasetRefURIs(ResourcePath("/1/2/3"), {"comp": ResourcePath("/a/b/c")}) 

1906 self.assertEqual( 

1907 repr(uris), 

1908 'DatasetRefURIs(ResourcePath("file:///1/2/3"), {\'comp\': ResourcePath("file:///a/b/c")})', 

1909 ) 

1910 

1911 

1912class StoredFileInfoTestCase(DatasetTestHelper, unittest.TestCase): 

1913 """Test the StoredFileInfo class.""" 

1914 

1915 storageClassFactory = StorageClassFactory() 

1916 

1917 def test_StoredFileInfo(self) -> None: 

1918 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1919 ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}) 

1920 

1921 record = dict( 

1922 storage_class="StructuredDataDict", 

1923 formatter="lsst.daf.butler.Formatter", 

1924 path="a/b/c.txt", 

1925 component="component", 

1926 dataset_id=ref.id, 

1927 checksum=None, 

1928 file_size=5, 

1929 ) 

1930 info = StoredFileInfo.from_record(record) 

1931 

1932 self.assertEqual(info.dataset_id, ref.id) 

1933 self.assertEqual(info.to_record(), record) 

1934 

1935 ref2 = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}) 

1936 rebased = info.rebase(ref2) 

1937 self.assertEqual(rebased.dataset_id, ref2.id) 

1938 self.assertEqual(rebased.rebase(ref), info) 

1939 

1940 with self.assertRaises(TypeError): 

1941 rebased.update(formatter=42) 

1942 

1943 with self.assertRaises(ValueError): 

1944 rebased.update(something=42, new="42") 

1945 

1946 # Check that pickle works on StoredFileInfo. 

1947 pickled_info = pickle.dumps(info) 

1948 unpickled_info = pickle.loads(pickled_info) 

1949 self.assertEqual(unpickled_info, info) 

1950 

1951 

1952if __name__ == "__main__": 

1953 unittest.main()