Coverage for tests/test_datastore.py: 17%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

791 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import os 

23import shutil 

24import tempfile 

25import time 

26import unittest 

27from dataclasses import dataclass 

28 

29import lsst.utils.tests 

30import yaml 

31from lsst.daf.butler import ( 

32 Config, 

33 DatasetTypeNotSupportedError, 

34 DatastoreCacheManager, 

35 DatastoreCacheManagerConfig, 

36 DatastoreConfig, 

37 DatastoreDisabledCacheManager, 

38 DatastoreValidationError, 

39 DimensionUniverse, 

40 FileDataset, 

41 NamedKeyDict, 

42 StorageClass, 

43 StorageClassFactory, 

44) 

45from lsst.daf.butler.formatters.yaml import YamlFormatter 

46from lsst.daf.butler.tests import ( 

47 BadNoWriteFormatter, 

48 BadWriteFormatter, 

49 DatasetTestHelper, 

50 DatastoreTestHelper, 

51 DummyRegistry, 

52 MetricsExample, 

53) 

54from lsst.resources import ResourcePath 

55from lsst.utils import doImport 

56 

57TESTDIR = os.path.dirname(__file__) 

58 

59 

60def makeExampleMetrics(use_none=False): 

61 if use_none: 

62 array = None 

63 else: 

64 array = [563, 234, 456.7, 105, 2054, -1045] 

65 return MetricsExample( 

66 {"AM1": 5.2, "AM2": 30.6}, 

67 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

68 array, 

69 ) 

70 

71 

72@dataclass(frozen=True) 

73class Named: 

74 name: str 

75 

76 

77class FakeDataCoordinate(NamedKeyDict): 

78 """A fake hashable frozen DataCoordinate built from a simple dict.""" 

79 

80 @classmethod 

81 def from_dict(cls, dataId): 

82 new = cls() 

83 for k, v in dataId.items(): 

84 new[Named(k)] = v 

85 return new.freeze() 

86 

87 def __hash__(self) -> int: 

88 return hash(frozenset(self.items())) 

89 

90 

91class TransactionTestError(Exception): 

92 """Specific error for transactions, to prevent misdiagnosing 

93 that might otherwise occur when a standard exception is used. 

94 """ 

95 

96 pass 

97 

98 

99class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper): 

100 """Support routines for datastore testing""" 

101 

102 root = None 

103 

104 @classmethod 

105 def setUpClass(cls): 

106 # Storage Classes are fixed for all datastores in these tests 

107 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

108 cls.storageClassFactory = StorageClassFactory() 

109 cls.storageClassFactory.addFromConfig(scConfigFile) 

110 

111 # Read the Datastore config so we can get the class 

112 # information (since we should not assume the constructor 

113 # name here, but rely on the configuration file itself) 

114 datastoreConfig = DatastoreConfig(cls.configFile) 

115 cls.datastoreType = doImport(datastoreConfig["cls"]) 

116 cls.universe = DimensionUniverse() 

117 

118 def setUp(self): 

119 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

120 

121 def tearDown(self): 

122 if self.root is not None and os.path.exists(self.root): 

123 shutil.rmtree(self.root, ignore_errors=True) 

124 

125 

126class DatastoreTests(DatastoreTestsBase): 

127 """Some basic tests of a simple datastore.""" 

128 

129 hasUnsupportedPut = True 

130 

131 def testConfigRoot(self): 

132 full = DatastoreConfig(self.configFile) 

133 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

134 newroot = "/random/location" 

135 self.datastoreType.setConfigRoot(newroot, config, full) 

136 if self.rootKeys: 

137 for k in self.rootKeys: 

138 self.assertIn(newroot, config[k]) 

139 

140 def testConstructor(self): 

141 datastore = self.makeDatastore() 

142 self.assertIsNotNone(datastore) 

143 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

144 

145 def testConfigurationValidation(self): 

146 datastore = self.makeDatastore() 

147 sc = self.storageClassFactory.getStorageClass("ThingOne") 

148 datastore.validateConfiguration([sc]) 

149 

150 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

151 if self.validationCanFail: 

152 with self.assertRaises(DatastoreValidationError): 

153 datastore.validateConfiguration([sc2], logFailures=True) 

154 

155 dimensions = self.universe.extract(("visit", "physical_filter")) 

156 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

157 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

158 datastore.validateConfiguration([ref]) 

159 

160 def testParameterValidation(self): 

161 """Check that parameters are validated""" 

162 sc = self.storageClassFactory.getStorageClass("ThingOne") 

163 dimensions = self.universe.extract(("visit", "physical_filter")) 

164 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

165 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

166 datastore = self.makeDatastore() 

167 data = {1: 2, 3: 4} 

168 datastore.put(data, ref) 

169 newdata = datastore.get(ref) 

170 self.assertEqual(data, newdata) 

171 with self.assertRaises(KeyError): 

172 newdata = datastore.get(ref, parameters={"missing": 5}) 

173 

174 def testBasicPutGet(self): 

175 metrics = makeExampleMetrics() 

176 datastore = self.makeDatastore() 

177 

178 # Create multiple storage classes for testing different formulations 

179 storageClasses = [ 

180 self.storageClassFactory.getStorageClass(sc) 

181 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle") 

182 ] 

183 

184 dimensions = self.universe.extract(("visit", "physical_filter")) 

185 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

186 

187 for sc in storageClasses: 

188 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

189 print("Using storageClass: {}".format(sc.name)) 

190 datastore.put(metrics, ref) 

191 

192 # Does it exist? 

193 self.assertTrue(datastore.exists(ref)) 

194 

195 # Get 

196 metricsOut = datastore.get(ref, parameters=None) 

197 self.assertEqual(metrics, metricsOut) 

198 

199 uri = datastore.getURI(ref) 

200 self.assertEqual(uri.scheme, self.uriScheme) 

201 

202 # Get a component -- we need to construct new refs for them 

203 # with derived storage classes but with parent ID 

204 for comp in ("data", "output"): 

205 compRef = ref.makeComponentRef(comp) 

206 output = datastore.get(compRef) 

207 self.assertEqual(output, getattr(metricsOut, comp)) 

208 

209 uri = datastore.getURI(compRef) 

210 self.assertEqual(uri.scheme, self.uriScheme) 

211 

212 storageClass = sc 

213 

214 # Check that we can put a metric with None in a component and 

215 # get it back as None 

216 metricsNone = makeExampleMetrics(use_none=True) 

217 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"} 

218 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False) 

219 datastore.put(metricsNone, refNone) 

220 

221 comp = "data" 

222 for comp in ("data", "output"): 

223 compRef = refNone.makeComponentRef(comp) 

224 output = datastore.get(compRef) 

225 self.assertEqual(output, getattr(metricsNone, comp)) 

226 

227 # Check that a put fails if the dataset type is not supported 

228 if self.hasUnsupportedPut: 

229 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

230 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

231 with self.assertRaises(DatasetTypeNotSupportedError): 

232 datastore.put(metrics, ref) 

233 

234 # These should raise 

235 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000) 

236 with self.assertRaises(FileNotFoundError): 

237 # non-existing file 

238 datastore.get(ref) 

239 

240 # Get a URI from it 

241 uri = datastore.getURI(ref, predict=True) 

242 self.assertEqual(uri.scheme, self.uriScheme) 

243 

244 with self.assertRaises(FileNotFoundError): 

245 datastore.getURI(ref) 

246 

247 def testTrustGetRequest(self): 

248 """Check that we can get datasets that registry knows nothing about.""" 

249 

250 datastore = self.makeDatastore() 

251 

252 # Skip test if the attribute is not defined 

253 if not hasattr(datastore, "trustGetRequest"): 

254 return 

255 

256 metrics = makeExampleMetrics() 

257 

258 i = 0 

259 for sc_name in ("StructuredData", "StructuredComposite"): 

260 i += 1 

261 datasetTypeName = f"metric{i}" 

262 

263 if sc_name == "StructuredComposite": 

264 disassembled = True 

265 else: 

266 disassembled = False 

267 

268 # Start datastore in default configuration of using registry 

269 datastore.trustGetRequest = False 

270 

271 # Create multiple storage classes for testing with or without 

272 # disassembly 

273 sc = self.storageClassFactory.getStorageClass(sc_name) 

274 dimensions = self.universe.extract(("visit", "physical_filter")) 

275 dataId = {"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"} 

276 

277 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

278 datastore.put(metrics, ref) 

279 

280 # Does it exist? 

281 self.assertTrue(datastore.exists(ref)) 

282 

283 # Get 

284 metricsOut = datastore.get(ref) 

285 self.assertEqual(metrics, metricsOut) 

286 

287 # Get the URI(s) 

288 primaryURI, componentURIs = datastore.getURIs(ref) 

289 if disassembled: 

290 self.assertIsNone(primaryURI) 

291 self.assertEqual(len(componentURIs), 3) 

292 else: 

293 self.assertIn(datasetTypeName, primaryURI.path) 

294 self.assertFalse(componentURIs) 

295 

296 # Delete registry entry so now we are trusting 

297 datastore.removeStoredItemInfo(ref) 

298 

299 # Now stop trusting and check that things break 

300 datastore.trustGetRequest = False 

301 

302 # Does it exist? 

303 self.assertFalse(datastore.exists(ref)) 

304 

305 with self.assertRaises(FileNotFoundError): 

306 datastore.get(ref) 

307 

308 with self.assertRaises(FileNotFoundError): 

309 datastore.get(ref.makeComponentRef("data")) 

310 

311 # URI should fail unless we ask for prediction 

312 with self.assertRaises(FileNotFoundError): 

313 datastore.getURIs(ref) 

314 

315 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

316 if disassembled: 

317 self.assertIsNone(predicted_primary) 

318 self.assertEqual(len(predicted_disassembled), 3) 

319 for uri in predicted_disassembled.values(): 

320 self.assertEqual(uri.fragment, "predicted") 

321 self.assertIn(datasetTypeName, uri.path) 

322 else: 

323 self.assertIn(datasetTypeName, predicted_primary.path) 

324 self.assertFalse(predicted_disassembled) 

325 self.assertEqual(predicted_primary.fragment, "predicted") 

326 

327 # Now enable registry-free trusting mode 

328 datastore.trustGetRequest = True 

329 

330 # Try again to get it 

331 metricsOut = datastore.get(ref) 

332 self.assertEqual(metricsOut, metrics) 

333 

334 # Does it exist? 

335 self.assertTrue(datastore.exists(ref)) 

336 

337 # Get a component 

338 comp = "data" 

339 compRef = ref.makeComponentRef(comp) 

340 output = datastore.get(compRef) 

341 self.assertEqual(output, getattr(metrics, comp)) 

342 

343 # Get the URI -- if we trust this should work even without 

344 # enabling prediction. 

345 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

346 self.assertEqual(primaryURI2, primaryURI) 

347 self.assertEqual(componentURIs2, componentURIs) 

348 

349 def testDisassembly(self): 

350 """Test disassembly within datastore.""" 

351 metrics = makeExampleMetrics() 

352 if self.isEphemeral: 

353 # in-memory datastore does not disassemble 

354 return 

355 

356 # Create multiple storage classes for testing different formulations 

357 # of composites. One of these will not disassemble to provide 

358 # a reference. 

359 storageClasses = [ 

360 self.storageClassFactory.getStorageClass(sc) 

361 for sc in ( 

362 "StructuredComposite", 

363 "StructuredCompositeTestA", 

364 "StructuredCompositeTestB", 

365 "StructuredCompositeReadComp", 

366 "StructuredData", # No disassembly 

367 "StructuredCompositeReadCompNoDisassembly", 

368 ) 

369 ] 

370 

371 # Create the test datastore 

372 datastore = self.makeDatastore() 

373 

374 # Dummy dataId 

375 dimensions = self.universe.extract(("visit", "physical_filter")) 

376 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

377 

378 for i, sc in enumerate(storageClasses): 

379 with self.subTest(storageClass=sc.name): 

380 # Create a different dataset type each time round 

381 # so that a test failure in this subtest does not trigger 

382 # a cascade of tests because of file clashes 

383 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, conform=False) 

384 

385 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

386 

387 datastore.put(metrics, ref) 

388 

389 baseURI, compURIs = datastore.getURIs(ref) 

390 if disassembled: 

391 self.assertIsNone(baseURI) 

392 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

393 else: 

394 self.assertIsNotNone(baseURI) 

395 self.assertEqual(compURIs, {}) 

396 

397 metrics_get = datastore.get(ref) 

398 self.assertEqual(metrics_get, metrics) 

399 

400 # Retrieve the composite with read parameter 

401 stop = 4 

402 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

403 self.assertEqual(metrics_get.summary, metrics.summary) 

404 self.assertEqual(metrics_get.output, metrics.output) 

405 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

406 

407 # Retrieve a component 

408 data = datastore.get(ref.makeComponentRef("data")) 

409 self.assertEqual(data, metrics.data) 

410 

411 # On supported storage classes attempt to access a read 

412 # only component 

413 if "ReadComp" in sc.name: 

414 cRef = ref.makeComponentRef("counter") 

415 counter = datastore.get(cRef) 

416 self.assertEqual(counter, len(metrics.data)) 

417 

418 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

419 self.assertEqual(counter, stop) 

420 

421 datastore.remove(ref) 

422 

423 def testRegistryCompositePutGet(self): 

424 """Tests the case where registry disassembles and puts to datastore.""" 

425 metrics = makeExampleMetrics() 

426 datastore = self.makeDatastore() 

427 

428 # Create multiple storage classes for testing different formulations 

429 # of composites 

430 storageClasses = [ 

431 self.storageClassFactory.getStorageClass(sc) 

432 for sc in ( 

433 "StructuredComposite", 

434 "StructuredCompositeTestA", 

435 "StructuredCompositeTestB", 

436 ) 

437 ] 

438 

439 dimensions = self.universe.extract(("visit", "physical_filter")) 

440 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

441 

442 for sc in storageClasses: 

443 print("Using storageClass: {}".format(sc.name)) 

444 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

445 

446 components = sc.delegate().disassemble(metrics) 

447 self.assertTrue(components) 

448 

449 compsRead = {} 

450 for compName, compInfo in components.items(): 

451 compRef = self.makeDatasetRef( 

452 ref.datasetType.componentTypeName(compName), 

453 dimensions, 

454 components[compName].storageClass, 

455 dataId, 

456 conform=False, 

457 ) 

458 

459 print("Writing component {} with {}".format(compName, compRef.datasetType.storageClass.name)) 

460 datastore.put(compInfo.component, compRef) 

461 

462 uri = datastore.getURI(compRef) 

463 self.assertEqual(uri.scheme, self.uriScheme) 

464 

465 compsRead[compName] = datastore.get(compRef) 

466 

467 # We can generate identical files for each storage class 

468 # so remove the component here 

469 datastore.remove(compRef) 

470 

471 # combine all the components we read back into a new composite 

472 metricsOut = sc.delegate().assemble(compsRead) 

473 self.assertEqual(metrics, metricsOut) 

474 

475 def prepDeleteTest(self, n_refs=1): 

476 metrics = makeExampleMetrics() 

477 datastore = self.makeDatastore() 

478 # Put 

479 dimensions = self.universe.extract(("visit", "physical_filter")) 

480 sc = self.storageClassFactory.getStorageClass("StructuredData") 

481 refs = [] 

482 for i in range(n_refs): 

483 dataId = FakeDataCoordinate.from_dict( 

484 {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U"} 

485 ) 

486 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

487 datastore.put(metrics, ref) 

488 

489 # Does it exist? 

490 self.assertTrue(datastore.exists(ref)) 

491 

492 # Get 

493 metricsOut = datastore.get(ref) 

494 self.assertEqual(metrics, metricsOut) 

495 refs.append(ref) 

496 

497 return datastore, *refs 

498 

499 def testRemove(self): 

500 datastore, ref = self.prepDeleteTest() 

501 

502 # Remove 

503 datastore.remove(ref) 

504 

505 # Does it exist? 

506 self.assertFalse(datastore.exists(ref)) 

507 

508 # Do we now get a predicted URI? 

509 uri = datastore.getURI(ref, predict=True) 

510 self.assertEqual(uri.fragment, "predicted") 

511 

512 # Get should now fail 

513 with self.assertRaises(FileNotFoundError): 

514 datastore.get(ref) 

515 # Can only delete once 

516 with self.assertRaises(FileNotFoundError): 

517 datastore.remove(ref) 

518 

519 def testForget(self): 

520 datastore, ref = self.prepDeleteTest() 

521 

522 # Remove 

523 datastore.forget([ref]) 

524 

525 # Does it exist (as far as we know)? 

526 self.assertFalse(datastore.exists(ref)) 

527 

528 # Do we now get a predicted URI? 

529 uri = datastore.getURI(ref, predict=True) 

530 self.assertEqual(uri.fragment, "predicted") 

531 

532 # Get should now fail 

533 with self.assertRaises(FileNotFoundError): 

534 datastore.get(ref) 

535 

536 # Forgetting again is a silent no-op 

537 datastore.forget([ref]) 

538 

539 # Predicted URI should still point to the file. 

540 self.assertTrue(uri.exists()) 

541 

542 def testTransfer(self): 

543 metrics = makeExampleMetrics() 

544 

545 dimensions = self.universe.extract(("visit", "physical_filter")) 

546 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"} 

547 

548 sc = self.storageClassFactory.getStorageClass("StructuredData") 

549 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

550 

551 inputDatastore = self.makeDatastore("test_input_datastore") 

552 outputDatastore = self.makeDatastore("test_output_datastore") 

553 

554 inputDatastore.put(metrics, ref) 

555 outputDatastore.transfer(inputDatastore, ref) 

556 

557 metricsOut = outputDatastore.get(ref) 

558 self.assertEqual(metrics, metricsOut) 

559 

560 def testBasicTransaction(self): 

561 datastore = self.makeDatastore() 

562 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

563 dimensions = self.universe.extract(("visit", "physical_filter")) 

564 nDatasets = 6 

565 dataIds = [{"instrument": "dummy", "visit": i, "physical_filter": "V"} for i in range(nDatasets)] 

566 data = [ 

567 ( 

568 self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False), 

569 makeExampleMetrics(), 

570 ) 

571 for dataId in dataIds 

572 ] 

573 succeed = data[: nDatasets // 2] 

574 fail = data[nDatasets // 2 :] 

575 # All datasets added in this transaction should continue to exist 

576 with datastore.transaction(): 

577 for ref, metrics in succeed: 

578 datastore.put(metrics, ref) 

579 # Whereas datasets added in this transaction should not 

580 with self.assertRaises(TransactionTestError): 

581 with datastore.transaction(): 

582 for ref, metrics in fail: 

583 datastore.put(metrics, ref) 

584 raise TransactionTestError("This should propagate out of the context manager") 

585 # Check for datasets that should exist 

586 for ref, metrics in succeed: 

587 # Does it exist? 

588 self.assertTrue(datastore.exists(ref)) 

589 # Get 

590 metricsOut = datastore.get(ref, parameters=None) 

591 self.assertEqual(metrics, metricsOut) 

592 # URI 

593 uri = datastore.getURI(ref) 

594 self.assertEqual(uri.scheme, self.uriScheme) 

595 # Check for datasets that should not exist 

596 for ref, _ in fail: 

597 # These should raise 

598 with self.assertRaises(FileNotFoundError): 

599 # non-existing file 

600 datastore.get(ref) 

601 with self.assertRaises(FileNotFoundError): 

602 datastore.getURI(ref) 

603 

604 def testNestedTransaction(self): 

605 datastore = self.makeDatastore() 

606 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

607 dimensions = self.universe.extract(("visit", "physical_filter")) 

608 metrics = makeExampleMetrics() 

609 

610 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

611 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

612 datastore.put(metrics, refBefore) 

613 with self.assertRaises(TransactionTestError): 

614 with datastore.transaction(): 

615 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V"} 

616 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

617 datastore.put(metrics, refOuter) 

618 with datastore.transaction(): 

619 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V"} 

620 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

621 datastore.put(metrics, refInner) 

622 # All datasets should exist 

623 for ref in (refBefore, refOuter, refInner): 

624 metricsOut = datastore.get(ref, parameters=None) 

625 self.assertEqual(metrics, metricsOut) 

626 raise TransactionTestError("This should roll back the transaction") 

627 # Dataset(s) inserted before the transaction should still exist 

628 metricsOut = datastore.get(refBefore, parameters=None) 

629 self.assertEqual(metrics, metricsOut) 

630 # But all datasets inserted during the (rolled back) transaction 

631 # should be gone 

632 with self.assertRaises(FileNotFoundError): 

633 datastore.get(refOuter) 

634 with self.assertRaises(FileNotFoundError): 

635 datastore.get(refInner) 

636 

637 def _prepareIngestTest(self): 

638 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

639 dimensions = self.universe.extract(("visit", "physical_filter")) 

640 metrics = makeExampleMetrics() 

641 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

642 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

643 return metrics, ref 

644 

645 def runIngestTest(self, func, expectOutput=True): 

646 metrics, ref = self._prepareIngestTest() 

647 # The file will be deleted after the test. 

648 # For symlink tests this leads to a situation where the datastore 

649 # points to a file that does not exist. This will make os.path.exist 

650 # return False but then the new symlink will fail with 

651 # FileExistsError later in the code so the test still passes. 

652 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path: 

653 with open(path, "w") as fd: 

654 yaml.dump(metrics._asdict(), stream=fd) 

655 func(metrics, path, ref) 

656 

657 def testIngestNoTransfer(self): 

658 """Test ingesting existing files with no transfer.""" 

659 for mode in (None, "auto"): 

660 

661 # Some datastores have auto but can't do in place transfer 

662 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

663 continue 

664 

665 with self.subTest(mode=mode): 

666 datastore = self.makeDatastore() 

667 

668 def succeed(obj, path, ref): 

669 """Ingest a file already in the datastore root.""" 

670 # first move it into the root, and adjust the path 

671 # accordingly 

672 path = shutil.copy(path, datastore.root.ospath) 

673 path = os.path.relpath(path, start=datastore.root.ospath) 

674 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

675 self.assertEqual(obj, datastore.get(ref)) 

676 

677 def failInputDoesNotExist(obj, path, ref): 

678 """Can't ingest files if we're given a bad path.""" 

679 with self.assertRaises(FileNotFoundError): 

680 datastore.ingest( 

681 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode 

682 ) 

683 self.assertFalse(datastore.exists(ref)) 

684 

685 def failOutsideRoot(obj, path, ref): 

686 """Can't ingest files outside of datastore root unless 

687 auto.""" 

688 if mode == "auto": 

689 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

690 self.assertTrue(datastore.exists(ref)) 

691 else: 

692 with self.assertRaises(RuntimeError): 

693 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

694 self.assertFalse(datastore.exists(ref)) 

695 

696 def failNotImplemented(obj, path, ref): 

697 with self.assertRaises(NotImplementedError): 

698 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

699 

700 if mode in self.ingestTransferModes: 

701 self.runIngestTest(failOutsideRoot) 

702 self.runIngestTest(failInputDoesNotExist) 

703 self.runIngestTest(succeed) 

704 else: 

705 self.runIngestTest(failNotImplemented) 

706 

707 def testIngestTransfer(self): 

708 """Test ingesting existing files after transferring them.""" 

709 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

710 with self.subTest(mode=mode): 

711 datastore = self.makeDatastore(mode) 

712 

713 def succeed(obj, path, ref): 

714 """Ingest a file by transferring it to the template 

715 location.""" 

716 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

717 self.assertEqual(obj, datastore.get(ref)) 

718 

719 def failInputDoesNotExist(obj, path, ref): 

720 """Can't ingest files if we're given a bad path.""" 

721 with self.assertRaises(FileNotFoundError): 

722 # Ensure the file does not look like it is in 

723 # datastore for auto mode 

724 datastore.ingest( 

725 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode 

726 ) 

727 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

728 

729 def failNotImplemented(obj, path, ref): 

730 with self.assertRaises(NotImplementedError): 

731 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

732 

733 if mode in self.ingestTransferModes: 

734 self.runIngestTest(failInputDoesNotExist) 

735 self.runIngestTest(succeed, expectOutput=(mode != "move")) 

736 else: 

737 self.runIngestTest(failNotImplemented) 

738 

739 def testIngestSymlinkOfSymlink(self): 

740 """Special test for symlink to a symlink ingest""" 

741 metrics, ref = self._prepareIngestTest() 

742 # The aim of this test is to create a dataset on disk, then 

743 # create a symlink to it and finally ingest the symlink such that 

744 # the symlink in the datastore points to the original dataset. 

745 for mode in ("symlink", "relsymlink"): 

746 if mode not in self.ingestTransferModes: 

747 continue 

748 

749 print(f"Trying mode {mode}") 

750 with lsst.utils.tests.getTempFilePath(".yaml") as realpath: 

751 with open(realpath, "w") as fd: 

752 yaml.dump(metrics._asdict(), stream=fd) 

753 with lsst.utils.tests.getTempFilePath(".yaml") as sympath: 

754 os.symlink(os.path.abspath(realpath), sympath) 

755 

756 datastore = self.makeDatastore() 

757 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

758 

759 uri = datastore.getURI(ref) 

760 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

761 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

762 

763 linkTarget = os.readlink(uri.ospath) 

764 if mode == "relsymlink": 

765 self.assertFalse(os.path.isabs(linkTarget)) 

766 else: 

767 self.assertEqual(linkTarget, os.path.abspath(realpath)) 

768 

769 # Check that we can get the dataset back regardless of mode 

770 metric2 = datastore.get(ref) 

771 self.assertEqual(metric2, metrics) 

772 

773 # Cleanup the file for next time round loop 

774 # since it will get the same file name in store 

775 datastore.remove(ref) 

776 

777 

778class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

779 """PosixDatastore specialization""" 

780 

781 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

782 uriScheme = "file" 

783 canIngestNoTransferAuto = True 

784 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

785 isEphemeral = False 

786 rootKeys = ("root",) 

787 validationCanFail = True 

788 

789 def setUp(self): 

790 # Override the working directory before calling the base class 

791 self.root = tempfile.mkdtemp(dir=TESTDIR) 

792 super().setUp() 

793 

794 

795class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

796 """Posix datastore tests but with checksums disabled.""" 

797 

798 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

799 

800 def testChecksum(self): 

801 """Ensure that checksums have not been calculated.""" 

802 

803 datastore = self.makeDatastore() 

804 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

805 dimensions = self.universe.extract(("visit", "physical_filter")) 

806 metrics = makeExampleMetrics() 

807 

808 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

809 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

810 

811 # Configuration should have disabled checksum calculation 

812 datastore.put(metrics, ref) 

813 infos = datastore.getStoredItemsInfo(ref) 

814 self.assertIsNone(infos[0].checksum) 

815 

816 # Remove put back but with checksums enabled explicitly 

817 datastore.remove(ref) 

818 datastore.useChecksum = True 

819 datastore.put(metrics, ref) 

820 

821 infos = datastore.getStoredItemsInfo(ref) 

822 self.assertIsNotNone(infos[0].checksum) 

823 

824 

825class TrashDatastoreTestCase(PosixDatastoreTestCase): 

826 """Restrict trash test to FileDatastore.""" 

827 

828 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

829 

830 def testTrash(self): 

831 datastore, *refs = self.prepDeleteTest(n_refs=10) 

832 

833 # Trash one of them. 

834 ref = refs.pop() 

835 uri = datastore.getURI(ref) 

836 datastore.trash(ref) 

837 self.assertTrue(uri.exists(), uri) # Not deleted yet 

838 datastore.emptyTrash() 

839 self.assertFalse(uri.exists(), uri) 

840 

841 # Trash it again should be fine. 

842 datastore.trash(ref) 

843 

844 # Trash multiple items at once. 

845 subset = [refs.pop(), refs.pop()] 

846 datastore.trash(subset) 

847 datastore.emptyTrash() 

848 

849 # Remove a record and trash should do nothing. 

850 # This is execution butler scenario. 

851 ref = refs.pop() 

852 uri = datastore.getURI(ref) 

853 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

854 self.assertTrue(uri.exists()) 

855 datastore.trash(ref) 

856 datastore.emptyTrash() 

857 self.assertTrue(uri.exists()) 

858 

859 # Switch on trust and it should delete the file. 

860 datastore.trustGetRequest = True 

861 datastore.trash([ref]) 

862 self.assertFalse(uri.exists()) 

863 

864 # Remove multiples at once in trust mode. 

865 subset = [refs.pop() for i in range(3)] 

866 datastore.trash(subset) 

867 datastore.trash(refs.pop()) # Check that a single ref can trash 

868 

869 

870class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

871 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

872 

873 def setUp(self): 

874 # Override the working directory before calling the base class 

875 self.root = tempfile.mkdtemp(dir=TESTDIR) 

876 super().setUp() 

877 

878 def testCleanup(self): 

879 """Test that a failed formatter write does cleanup a partial file.""" 

880 metrics = makeExampleMetrics() 

881 datastore = self.makeDatastore() 

882 

883 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

884 

885 dimensions = self.universe.extract(("visit", "physical_filter")) 

886 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

887 

888 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

889 

890 # Determine where the file will end up (we assume Formatters use 

891 # the same file extension) 

892 expectedUri = datastore.getURI(ref, predict=True) 

893 self.assertEqual(expectedUri.fragment, "predicted") 

894 

895 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}") 

896 

897 # Try formatter that fails and formatter that fails and leaves 

898 # a file behind 

899 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

900 with self.subTest(formatter=formatter): 

901 

902 # Monkey patch the formatter 

903 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True) 

904 

905 # Try to put the dataset, it should fail 

906 with self.assertRaises(Exception): 

907 datastore.put(metrics, ref) 

908 

909 # Check that there is no file on disk 

910 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

911 

912 # Check that there is a directory 

913 dir = expectedUri.dirname() 

914 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}") 

915 

916 # Force YamlFormatter and check that this time a file is written 

917 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True) 

918 datastore.put(metrics, ref) 

919 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

920 datastore.remove(ref) 

921 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

922 

923 

924class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

925 """PosixDatastore specialization""" 

926 

927 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

928 uriScheme = "mem" 

929 hasUnsupportedPut = False 

930 ingestTransferModes = () 

931 isEphemeral = True 

932 rootKeys = None 

933 validationCanFail = False 

934 

935 

936class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

937 """ChainedDatastore specialization using a POSIXDatastore""" 

938 

939 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

940 hasUnsupportedPut = False 

941 canIngestNoTransferAuto = False 

942 ingestTransferModes = ("copy", "hardlink", "symlink", "relsymlink", "link", "auto") 

943 isEphemeral = False 

944 rootKeys = (".datastores.1.root", ".datastores.2.root") 

945 validationCanFail = True 

946 

947 

948class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

949 """ChainedDatastore specialization using all InMemoryDatastore""" 

950 

951 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

952 validationCanFail = False 

953 

954 

955class DatastoreConstraintsTests(DatastoreTestsBase): 

956 """Basic tests of constraints model of Datastores.""" 

957 

958 def testConstraints(self): 

959 """Test constraints model. Assumes that each test class has the 

960 same constraints.""" 

961 metrics = makeExampleMetrics() 

962 datastore = self.makeDatastore() 

963 

964 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

965 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

966 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

967 dataId = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

968 

969 # Write empty file suitable for ingest check (JSON and YAML variants) 

970 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

971 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

972 for datasetTypeName, sc, accepted in ( 

973 ("metric", sc1, True), 

974 ("metric2", sc1, False), 

975 ("metric33", sc1, True), 

976 ("metric2", sc2, True), 

977 ): 

978 # Choose different temp file depending on StorageClass 

979 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

980 

981 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

982 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

983 if accepted: 

984 datastore.put(metrics, ref) 

985 self.assertTrue(datastore.exists(ref)) 

986 datastore.remove(ref) 

987 

988 # Try ingest 

989 if self.canIngest: 

990 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

991 self.assertTrue(datastore.exists(ref)) 

992 datastore.remove(ref) 

993 else: 

994 with self.assertRaises(DatasetTypeNotSupportedError): 

995 datastore.put(metrics, ref) 

996 self.assertFalse(datastore.exists(ref)) 

997 

998 # Again with ingest 

999 if self.canIngest: 

1000 with self.assertRaises(DatasetTypeNotSupportedError): 

1001 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1002 self.assertFalse(datastore.exists(ref)) 

1003 

1004 

1005class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1006 """PosixDatastore specialization""" 

1007 

1008 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

1009 canIngest = True 

1010 

1011 def setUp(self): 

1012 # Override the working directory before calling the base class 

1013 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1014 super().setUp() 

1015 

1016 

1017class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1018 """InMemoryDatastore specialization""" 

1019 

1020 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

1021 canIngest = False 

1022 

1023 

1024class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

1025 """ChainedDatastore specialization using a POSIXDatastore and constraints 

1026 at the ChainedDatstore""" 

1027 

1028 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

1029 

1030 

1031class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

1032 """ChainedDatastore specialization using a POSIXDatastore""" 

1033 

1034 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

1035 

1036 

1037class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

1038 """ChainedDatastore specialization using all InMemoryDatastore""" 

1039 

1040 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

1041 canIngest = False 

1042 

1043 

1044class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

1045 """Test that a chained datastore can control constraints per-datastore 

1046 even if child datastore would accept.""" 

1047 

1048 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

1049 

1050 def setUp(self): 

1051 # Override the working directory before calling the base class 

1052 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1053 super().setUp() 

1054 

1055 def testConstraints(self): 

1056 """Test chained datastore constraints model.""" 

1057 metrics = makeExampleMetrics() 

1058 datastore = self.makeDatastore() 

1059 

1060 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1061 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1062 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1063 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

1064 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"} 

1065 

1066 # Write empty file suitable for ingest check (JSON and YAML variants) 

1067 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1068 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1069 

1070 for typeName, dataId, sc, accept, ingest in ( 

1071 ("metric", dataId1, sc1, (False, True, False), True), 

1072 ("metric2", dataId1, sc1, (False, False, False), False), 

1073 ("metric2", dataId2, sc1, (True, False, False), False), 

1074 ("metric33", dataId2, sc2, (True, True, False), True), 

1075 ("metric2", dataId1, sc2, (False, True, False), True), 

1076 ): 

1077 

1078 # Choose different temp file depending on StorageClass 

1079 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1080 

1081 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

1082 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, conform=False) 

1083 if any(accept): 

1084 datastore.put(metrics, ref) 

1085 self.assertTrue(datastore.exists(ref)) 

1086 

1087 # Check each datastore inside the chained datastore 

1088 for childDatastore, expected in zip(datastore.datastores, accept): 

1089 self.assertEqual( 

1090 childDatastore.exists(ref), 

1091 expected, 

1092 f"Testing presence of {ref} in datastore {childDatastore.name}", 

1093 ) 

1094 

1095 datastore.remove(ref) 

1096 

1097 # Check that ingest works 

1098 if ingest: 

1099 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1100 self.assertTrue(datastore.exists(ref)) 

1101 

1102 # Check each datastore inside the chained datastore 

1103 for childDatastore, expected in zip(datastore.datastores, accept): 

1104 # Ephemeral datastores means InMemory at the moment 

1105 # and that does not accept ingest of files. 

1106 if childDatastore.isEphemeral: 

1107 expected = False 

1108 self.assertEqual( 

1109 childDatastore.exists(ref), 

1110 expected, 

1111 f"Testing presence of ingested {ref} in datastore {childDatastore.name}", 

1112 ) 

1113 

1114 datastore.remove(ref) 

1115 else: 

1116 with self.assertRaises(DatasetTypeNotSupportedError): 

1117 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1118 

1119 else: 

1120 with self.assertRaises(DatasetTypeNotSupportedError): 

1121 datastore.put(metrics, ref) 

1122 self.assertFalse(datastore.exists(ref)) 

1123 

1124 # Again with ingest 

1125 with self.assertRaises(DatasetTypeNotSupportedError): 

1126 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1127 self.assertFalse(datastore.exists(ref)) 

1128 

1129 

1130class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase): 

1131 """Tests for datastore caching infrastructure.""" 

1132 

1133 @classmethod 

1134 def setUpClass(cls): 

1135 cls.storageClassFactory = StorageClassFactory() 

1136 cls.universe = DimensionUniverse() 

1137 

1138 # Ensure that we load the test storage class definitions. 

1139 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

1140 cls.storageClassFactory.addFromConfig(scConfigFile) 

1141 

1142 def setUp(self): 

1143 self.id = 0 

1144 

1145 # Create a root that we can use for caching tests. 

1146 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1147 

1148 # Create some test dataset refs and associated test files 

1149 sc = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1150 dimensions = self.universe.extract(("visit", "physical_filter")) 

1151 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

1152 

1153 # Create list of refs and list of temporary files 

1154 n_datasets = 10 

1155 self.refs = [ 

1156 self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, conform=False) 

1157 for n in range(n_datasets) 

1158 ] 

1159 

1160 root_uri = ResourcePath(self.root, forceDirectory=True) 

1161 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)] 

1162 

1163 # Create test files. 

1164 for uri in self.files: 

1165 uri.write(b"0123456789") 

1166 

1167 # Create some composite refs with component files. 

1168 sc = self.storageClassFactory.getStorageClass("StructuredData") 

1169 self.composite_refs = [ 

1170 self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, conform=False) for n in range(3) 

1171 ] 

1172 self.comp_files = [] 

1173 self.comp_refs = [] 

1174 for n, ref in enumerate(self.composite_refs): 

1175 component_refs = [] 

1176 component_files = [] 

1177 for component in sc.components: 

1178 component_ref = ref.makeComponentRef(component) 

1179 file = root_uri.join(f"composite_file-{n}-{component}.txt") 

1180 component_refs.append(component_ref) 

1181 component_files.append(file) 

1182 file.write(b"9876543210") 

1183 

1184 self.comp_files.append(component_files) 

1185 self.comp_refs.append(component_refs) 

1186 

1187 def tearDown(self): 

1188 if self.root is not None and os.path.exists(self.root): 

1189 shutil.rmtree(self.root, ignore_errors=True) 

1190 

1191 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager: 

1192 config = Config.fromYaml(config_str) 

1193 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe) 

1194 

1195 def testNoCacheDir(self): 

1196 config_str = """ 

1197cached: 

1198 root: null 

1199 cacheable: 

1200 metric0: true 

1201 """ 

1202 cache_manager = self._make_cache_manager(config_str) 

1203 

1204 # Look inside to check we don't have a cache directory 

1205 self.assertIsNone(cache_manager._cache_directory) 

1206 

1207 self.assertCache(cache_manager) 

1208 

1209 # Test that the cache directory is marked temporary 

1210 self.assertTrue(cache_manager.cache_directory.isTemporary) 

1211 

1212 def testNoCacheDirReversed(self): 

1213 """Use default caching status and metric1 to false""" 

1214 config_str = """ 

1215cached: 

1216 root: null 

1217 default: true 

1218 cacheable: 

1219 metric1: false 

1220 """ 

1221 cache_manager = self._make_cache_manager(config_str) 

1222 

1223 self.assertCache(cache_manager) 

1224 

1225 def testExplicitCacheDir(self): 

1226 config_str = f""" 

1227cached: 

1228 root: '{self.root}' 

1229 cacheable: 

1230 metric0: true 

1231 """ 

1232 cache_manager = self._make_cache_manager(config_str) 

1233 

1234 # Look inside to check we do have a cache directory. 

1235 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True)) 

1236 

1237 self.assertCache(cache_manager) 

1238 

1239 # Test that the cache directory is not marked temporary 

1240 self.assertFalse(cache_manager.cache_directory.isTemporary) 

1241 

1242 def assertCache(self, cache_manager): 

1243 self.assertTrue(cache_manager.should_be_cached(self.refs[0])) 

1244 self.assertFalse(cache_manager.should_be_cached(self.refs[1])) 

1245 

1246 uri = cache_manager.move_to_cache(self.files[0], self.refs[0]) 

1247 self.assertIsInstance(uri, ResourcePath) 

1248 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1])) 

1249 

1250 # Check presence in cache using ref and then using file extension. 

1251 self.assertFalse(cache_manager.known_to_cache(self.refs[1])) 

1252 self.assertTrue(cache_manager.known_to_cache(self.refs[0])) 

1253 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension())) 

1254 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension())) 

1255 

1256 # Cached file should no longer exist but uncached file should be 

1257 # unaffected. 

1258 self.assertFalse(self.files[0].exists()) 

1259 self.assertTrue(self.files[1].exists()) 

1260 

1261 # Should find this file and it should be within the cache directory. 

1262 with cache_manager.find_in_cache(self.refs[0], ".txt") as found: 

1263 self.assertTrue(found.exists()) 

1264 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory)) 

1265 

1266 # Should not be able to find these in cache 

1267 with cache_manager.find_in_cache(self.refs[0], ".fits") as found: 

1268 self.assertIsNone(found) 

1269 with cache_manager.find_in_cache(self.refs[1], ".fits") as found: 

1270 self.assertIsNone(found) 

1271 

1272 def testNoCache(self): 

1273 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe) 

1274 for uri, ref in zip(self.files, self.refs): 

1275 self.assertFalse(cache_manager.should_be_cached(ref)) 

1276 self.assertIsNone(cache_manager.move_to_cache(uri, ref)) 

1277 self.assertFalse(cache_manager.known_to_cache(ref)) 

1278 with cache_manager.find_in_cache(ref, ".txt") as found: 

1279 self.assertIsNone(found, msg=f"{cache_manager}") 

1280 

1281 def _expiration_config(self, mode: str, threshold: int) -> str: 

1282 return f""" 

1283cached: 

1284 default: true 

1285 expiry: 

1286 mode: {mode} 

1287 threshold: {threshold} 

1288 cacheable: 

1289 unused: true 

1290 """ 

1291 

1292 def testCacheExpiryFiles(self): 

1293 threshold = 2 # Keep at least 2 files. 

1294 mode = "files" 

1295 config_str = self._expiration_config(mode, threshold) 

1296 

1297 cache_manager = self._make_cache_manager(config_str) 

1298 

1299 # Check that an empty cache returns unknown for arbitrary ref 

1300 self.assertFalse(cache_manager.known_to_cache(self.refs[0])) 

1301 

1302 # Should end with datasets: 2, 3, 4 

1303 self.assertExpiration(cache_manager, 5, threshold + 1) 

1304 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1305 

1306 # Check that we will not expire a file that is actively in use. 

1307 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1308 self.assertIsNotNone(found) 

1309 

1310 # Trigger cache expiration that should remove the file 

1311 # we just retrieved. Should now have: 3, 4, 5 

1312 cached = cache_manager.move_to_cache(self.files[5], self.refs[5]) 

1313 self.assertIsNotNone(cached) 

1314 

1315 # Cache should still report the standard file count. 

1316 self.assertEqual(cache_manager.file_count, threshold + 1) 

1317 

1318 # Add additional entry to cache. 

1319 # Should now have 4, 5, 6 

1320 cached = cache_manager.move_to_cache(self.files[6], self.refs[6]) 

1321 self.assertIsNotNone(cached) 

1322 

1323 # Is the file still there? 

1324 self.assertTrue(found.exists()) 

1325 

1326 # Can we read it? 

1327 data = found.read() 

1328 self.assertGreater(len(data), 0) 

1329 

1330 # Outside context the file should no longer exist. 

1331 self.assertFalse(found.exists()) 

1332 

1333 # File count should not have changed. 

1334 self.assertEqual(cache_manager.file_count, threshold + 1) 

1335 

1336 # Dataset 2 was in the exempt directory but because hardlinks 

1337 # are used it was deleted from the main cache during cache expiry 

1338 # above and so should no longer be found. 

1339 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1340 self.assertIsNone(found) 

1341 

1342 # And the one stored after it is also gone. 

1343 with cache_manager.find_in_cache(self.refs[3], ".txt") as found: 

1344 self.assertIsNone(found) 

1345 

1346 # But dataset 4 is present. 

1347 with cache_manager.find_in_cache(self.refs[4], ".txt") as found: 

1348 self.assertIsNotNone(found) 

1349 

1350 # Adding a new dataset to the cache should now delete it. 

1351 cache_manager.move_to_cache(self.files[7], self.refs[7]) 

1352 

1353 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1354 self.assertIsNone(found) 

1355 

1356 def testCacheExpiryDatasets(self): 

1357 threshold = 2 # Keep 2 datasets. 

1358 mode = "datasets" 

1359 config_str = self._expiration_config(mode, threshold) 

1360 

1361 cache_manager = self._make_cache_manager(config_str) 

1362 self.assertExpiration(cache_manager, 5, threshold + 1) 

1363 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1364 

1365 def testCacheExpiryDatasetsComposite(self): 

1366 threshold = 2 # Keep 2 datasets. 

1367 mode = "datasets" 

1368 config_str = self._expiration_config(mode, threshold) 

1369 

1370 cache_manager = self._make_cache_manager(config_str) 

1371 

1372 n_datasets = 3 

1373 for i in range(n_datasets): 

1374 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]): 

1375 cached = cache_manager.move_to_cache(component_file, component_ref) 

1376 self.assertIsNotNone(cached) 

1377 self.assertTrue(cache_manager.known_to_cache(component_ref)) 

1378 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef())) 

1379 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension())) 

1380 

1381 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files 

1382 

1383 # Write two new non-composite and the number of files should drop. 

1384 self.assertExpiration(cache_manager, 2, 5) 

1385 

1386 def testCacheExpirySize(self): 

1387 threshold = 55 # Each file is 10 bytes 

1388 mode = "size" 

1389 config_str = self._expiration_config(mode, threshold) 

1390 

1391 cache_manager = self._make_cache_manager(config_str) 

1392 self.assertExpiration(cache_manager, 10, 6) 

1393 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1394 

1395 def assertExpiration(self, cache_manager, n_datasets, n_retained): 

1396 """Insert the datasets and then check the number retained.""" 

1397 for i in range(n_datasets): 

1398 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1399 self.assertIsNotNone(cached) 

1400 

1401 self.assertEqual(cache_manager.file_count, n_retained) 

1402 

1403 # The oldest file should not be in the cache any more. 

1404 for i in range(n_datasets): 

1405 with cache_manager.find_in_cache(self.refs[i], ".txt") as found: 

1406 if i >= n_datasets - n_retained: 

1407 self.assertIsInstance(found, ResourcePath) 

1408 else: 

1409 self.assertIsNone(found) 

1410 

1411 def testCacheExpiryAge(self): 

1412 threshold = 1 # Expire older than 2 seconds 

1413 mode = "age" 

1414 config_str = self._expiration_config(mode, threshold) 

1415 

1416 cache_manager = self._make_cache_manager(config_str) 

1417 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1418 

1419 # Insert 3 files, then sleep, then insert more. 

1420 for i in range(2): 

1421 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1422 self.assertIsNotNone(cached) 

1423 time.sleep(2.0) 

1424 for j in range(4): 

1425 i = 2 + j # Continue the counting 

1426 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1427 self.assertIsNotNone(cached) 

1428 

1429 # Only the files written after the sleep should exist. 

1430 self.assertEqual(cache_manager.file_count, 4) 

1431 with cache_manager.find_in_cache(self.refs[1], ".txt") as found: 

1432 self.assertIsNone(found) 

1433 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1434 self.assertIsInstance(found, ResourcePath) 

1435 

1436 

1437if __name__ == "__main__": 1437 ↛ 1438line 1437 didn't jump to line 1438, because the condition on line 1437 was never true

1438 unittest.main()