Coverage for tests/test_datastore.py: 17%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

825 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import os 

23import shutil 

24import tempfile 

25import time 

26import unittest 

27from dataclasses import dataclass 

28from itertools import chain 

29 

30import lsst.utils.tests 

31import yaml 

32from lsst.daf.butler import ( 

33 Config, 

34 DatasetTypeNotSupportedError, 

35 DatastoreCacheManager, 

36 DatastoreCacheManagerConfig, 

37 DatastoreConfig, 

38 DatastoreDisabledCacheManager, 

39 DatastoreValidationError, 

40 DimensionUniverse, 

41 FileDataset, 

42 NamedKeyDict, 

43 StorageClass, 

44 StorageClassFactory, 

45) 

46from lsst.daf.butler.formatters.yaml import YamlFormatter 

47from lsst.daf.butler.tests import ( 

48 BadNoWriteFormatter, 

49 BadWriteFormatter, 

50 DatasetTestHelper, 

51 DatastoreTestHelper, 

52 DummyRegistry, 

53 MetricsExample, 

54) 

55from lsst.resources import ResourcePath 

56from lsst.utils import doImport 

57 

58TESTDIR = os.path.dirname(__file__) 

59 

60 

61def makeExampleMetrics(use_none=False): 

62 if use_none: 

63 array = None 

64 else: 

65 array = [563, 234, 456.7, 105, 2054, -1045] 

66 return MetricsExample( 

67 {"AM1": 5.2, "AM2": 30.6}, 

68 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

69 array, 

70 ) 

71 

72 

73@dataclass(frozen=True) 

74class Named: 

75 name: str 

76 

77 

78class FakeDataCoordinate(NamedKeyDict): 

79 """A fake hashable frozen DataCoordinate built from a simple dict.""" 

80 

81 @classmethod 

82 def from_dict(cls, dataId): 

83 new = cls() 

84 for k, v in dataId.items(): 

85 new[Named(k)] = v 

86 return new.freeze() 

87 

88 def __hash__(self) -> int: 

89 return hash(frozenset(self.items())) 

90 

91 

92class TransactionTestError(Exception): 

93 """Specific error for transactions, to prevent misdiagnosing 

94 that might otherwise occur when a standard exception is used. 

95 """ 

96 

97 pass 

98 

99 

100class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper): 

101 """Support routines for datastore testing""" 

102 

103 root = None 

104 

105 @classmethod 

106 def setUpClass(cls): 

107 # Storage Classes are fixed for all datastores in these tests 

108 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

109 cls.storageClassFactory = StorageClassFactory() 

110 cls.storageClassFactory.addFromConfig(scConfigFile) 

111 

112 # Read the Datastore config so we can get the class 

113 # information (since we should not assume the constructor 

114 # name here, but rely on the configuration file itself) 

115 datastoreConfig = DatastoreConfig(cls.configFile) 

116 cls.datastoreType = doImport(datastoreConfig["cls"]) 

117 cls.universe = DimensionUniverse() 

118 

119 def setUp(self): 

120 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

121 

122 def tearDown(self): 

123 if self.root is not None and os.path.exists(self.root): 

124 shutil.rmtree(self.root, ignore_errors=True) 

125 

126 

127class DatastoreTests(DatastoreTestsBase): 

128 """Some basic tests of a simple datastore.""" 

129 

130 hasUnsupportedPut = True 

131 

132 def testConfigRoot(self): 

133 full = DatastoreConfig(self.configFile) 

134 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

135 newroot = "/random/location" 

136 self.datastoreType.setConfigRoot(newroot, config, full) 

137 if self.rootKeys: 

138 for k in self.rootKeys: 

139 self.assertIn(newroot, config[k]) 

140 

141 def testConstructor(self): 

142 datastore = self.makeDatastore() 

143 self.assertIsNotNone(datastore) 

144 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

145 

146 def testConfigurationValidation(self): 

147 datastore = self.makeDatastore() 

148 sc = self.storageClassFactory.getStorageClass("ThingOne") 

149 datastore.validateConfiguration([sc]) 

150 

151 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

152 if self.validationCanFail: 

153 with self.assertRaises(DatastoreValidationError): 

154 datastore.validateConfiguration([sc2], logFailures=True) 

155 

156 dimensions = self.universe.extract(("visit", "physical_filter")) 

157 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

158 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

159 datastore.validateConfiguration([ref]) 

160 

161 def testParameterValidation(self): 

162 """Check that parameters are validated""" 

163 sc = self.storageClassFactory.getStorageClass("ThingOne") 

164 dimensions = self.universe.extract(("visit", "physical_filter")) 

165 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

166 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

167 datastore = self.makeDatastore() 

168 data = {1: 2, 3: 4} 

169 datastore.put(data, ref) 

170 newdata = datastore.get(ref) 

171 self.assertEqual(data, newdata) 

172 with self.assertRaises(KeyError): 

173 newdata = datastore.get(ref, parameters={"missing": 5}) 

174 

175 def testBasicPutGet(self): 

176 metrics = makeExampleMetrics() 

177 datastore = self.makeDatastore() 

178 

179 # Create multiple storage classes for testing different formulations 

180 storageClasses = [ 

181 self.storageClassFactory.getStorageClass(sc) 

182 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle") 

183 ] 

184 

185 dimensions = self.universe.extract(("visit", "physical_filter")) 

186 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

187 

188 for sc in storageClasses: 

189 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

190 print("Using storageClass: {}".format(sc.name)) 

191 datastore.put(metrics, ref) 

192 

193 # Does it exist? 

194 self.assertTrue(datastore.exists(ref)) 

195 

196 # Get 

197 metricsOut = datastore.get(ref, parameters=None) 

198 self.assertEqual(metrics, metricsOut) 

199 

200 uri = datastore.getURI(ref) 

201 self.assertEqual(uri.scheme, self.uriScheme) 

202 

203 # Get a component -- we need to construct new refs for them 

204 # with derived storage classes but with parent ID 

205 for comp in ("data", "output"): 

206 compRef = ref.makeComponentRef(comp) 

207 output = datastore.get(compRef) 

208 self.assertEqual(output, getattr(metricsOut, comp)) 

209 

210 uri = datastore.getURI(compRef) 

211 self.assertEqual(uri.scheme, self.uriScheme) 

212 

213 storageClass = sc 

214 

215 # Check that we can put a metric with None in a component and 

216 # get it back as None 

217 metricsNone = makeExampleMetrics(use_none=True) 

218 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"} 

219 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False) 

220 datastore.put(metricsNone, refNone) 

221 

222 comp = "data" 

223 for comp in ("data", "output"): 

224 compRef = refNone.makeComponentRef(comp) 

225 output = datastore.get(compRef) 

226 self.assertEqual(output, getattr(metricsNone, comp)) 

227 

228 # Check that a put fails if the dataset type is not supported 

229 if self.hasUnsupportedPut: 

230 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

231 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

232 with self.assertRaises(DatasetTypeNotSupportedError): 

233 datastore.put(metrics, ref) 

234 

235 # These should raise 

236 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000) 

237 with self.assertRaises(FileNotFoundError): 

238 # non-existing file 

239 datastore.get(ref) 

240 

241 # Get a URI from it 

242 uri = datastore.getURI(ref, predict=True) 

243 self.assertEqual(uri.scheme, self.uriScheme) 

244 

245 with self.assertRaises(FileNotFoundError): 

246 datastore.getURI(ref) 

247 

248 def testTrustGetRequest(self): 

249 """Check that we can get datasets that registry knows nothing about.""" 

250 

251 datastore = self.makeDatastore() 

252 

253 # Skip test if the attribute is not defined 

254 if not hasattr(datastore, "trustGetRequest"): 

255 return 

256 

257 metrics = makeExampleMetrics() 

258 

259 i = 0 

260 for sc_name in ("StructuredData", "StructuredComposite"): 

261 i += 1 

262 datasetTypeName = f"metric{i}" 

263 

264 if sc_name == "StructuredComposite": 

265 disassembled = True 

266 else: 

267 disassembled = False 

268 

269 # Start datastore in default configuration of using registry 

270 datastore.trustGetRequest = False 

271 

272 # Create multiple storage classes for testing with or without 

273 # disassembly 

274 sc = self.storageClassFactory.getStorageClass(sc_name) 

275 dimensions = self.universe.extract(("visit", "physical_filter")) 

276 dataId = {"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"} 

277 

278 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

279 datastore.put(metrics, ref) 

280 

281 # Does it exist? 

282 self.assertTrue(datastore.exists(ref)) 

283 

284 # Get 

285 metricsOut = datastore.get(ref) 

286 self.assertEqual(metrics, metricsOut) 

287 

288 # Get the URI(s) 

289 primaryURI, componentURIs = datastore.getURIs(ref) 

290 if disassembled: 

291 self.assertIsNone(primaryURI) 

292 self.assertEqual(len(componentURIs), 3) 

293 else: 

294 self.assertIn(datasetTypeName, primaryURI.path) 

295 self.assertFalse(componentURIs) 

296 

297 # Delete registry entry so now we are trusting 

298 datastore.removeStoredItemInfo(ref) 

299 

300 # Now stop trusting and check that things break 

301 datastore.trustGetRequest = False 

302 

303 # Does it exist? 

304 self.assertFalse(datastore.exists(ref)) 

305 

306 with self.assertRaises(FileNotFoundError): 

307 datastore.get(ref) 

308 

309 with self.assertRaises(FileNotFoundError): 

310 datastore.get(ref.makeComponentRef("data")) 

311 

312 # URI should fail unless we ask for prediction 

313 with self.assertRaises(FileNotFoundError): 

314 datastore.getURIs(ref) 

315 

316 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

317 if disassembled: 

318 self.assertIsNone(predicted_primary) 

319 self.assertEqual(len(predicted_disassembled), 3) 

320 for uri in predicted_disassembled.values(): 

321 self.assertEqual(uri.fragment, "predicted") 

322 self.assertIn(datasetTypeName, uri.path) 

323 else: 

324 self.assertIn(datasetTypeName, predicted_primary.path) 

325 self.assertFalse(predicted_disassembled) 

326 self.assertEqual(predicted_primary.fragment, "predicted") 

327 

328 # Now enable registry-free trusting mode 

329 datastore.trustGetRequest = True 

330 

331 # Try again to get it 

332 metricsOut = datastore.get(ref) 

333 self.assertEqual(metricsOut, metrics) 

334 

335 # Does it exist? 

336 self.assertTrue(datastore.exists(ref)) 

337 

338 # Get a component 

339 comp = "data" 

340 compRef = ref.makeComponentRef(comp) 

341 output = datastore.get(compRef) 

342 self.assertEqual(output, getattr(metrics, comp)) 

343 

344 # Get the URI -- if we trust this should work even without 

345 # enabling prediction. 

346 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

347 self.assertEqual(primaryURI2, primaryURI) 

348 self.assertEqual(componentURIs2, componentURIs) 

349 

350 def testDisassembly(self): 

351 """Test disassembly within datastore.""" 

352 metrics = makeExampleMetrics() 

353 if self.isEphemeral: 

354 # in-memory datastore does not disassemble 

355 return 

356 

357 # Create multiple storage classes for testing different formulations 

358 # of composites. One of these will not disassemble to provide 

359 # a reference. 

360 storageClasses = [ 

361 self.storageClassFactory.getStorageClass(sc) 

362 for sc in ( 

363 "StructuredComposite", 

364 "StructuredCompositeTestA", 

365 "StructuredCompositeTestB", 

366 "StructuredCompositeReadComp", 

367 "StructuredData", # No disassembly 

368 "StructuredCompositeReadCompNoDisassembly", 

369 ) 

370 ] 

371 

372 # Create the test datastore 

373 datastore = self.makeDatastore() 

374 

375 # Dummy dataId 

376 dimensions = self.universe.extract(("visit", "physical_filter")) 

377 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

378 

379 for i, sc in enumerate(storageClasses): 

380 with self.subTest(storageClass=sc.name): 

381 # Create a different dataset type each time round 

382 # so that a test failure in this subtest does not trigger 

383 # a cascade of tests because of file clashes 

384 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, conform=False) 

385 

386 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

387 

388 datastore.put(metrics, ref) 

389 

390 baseURI, compURIs = datastore.getURIs(ref) 

391 if disassembled: 

392 self.assertIsNone(baseURI) 

393 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

394 else: 

395 self.assertIsNotNone(baseURI) 

396 self.assertEqual(compURIs, {}) 

397 

398 metrics_get = datastore.get(ref) 

399 self.assertEqual(metrics_get, metrics) 

400 

401 # Retrieve the composite with read parameter 

402 stop = 4 

403 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

404 self.assertEqual(metrics_get.summary, metrics.summary) 

405 self.assertEqual(metrics_get.output, metrics.output) 

406 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

407 

408 # Retrieve a component 

409 data = datastore.get(ref.makeComponentRef("data")) 

410 self.assertEqual(data, metrics.data) 

411 

412 # On supported storage classes attempt to access a read 

413 # only component 

414 if "ReadComp" in sc.name: 

415 cRef = ref.makeComponentRef("counter") 

416 counter = datastore.get(cRef) 

417 self.assertEqual(counter, len(metrics.data)) 

418 

419 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

420 self.assertEqual(counter, stop) 

421 

422 datastore.remove(ref) 

423 

424 def testRegistryCompositePutGet(self): 

425 """Tests the case where registry disassembles and puts to datastore.""" 

426 metrics = makeExampleMetrics() 

427 datastore = self.makeDatastore() 

428 

429 # Create multiple storage classes for testing different formulations 

430 # of composites 

431 storageClasses = [ 

432 self.storageClassFactory.getStorageClass(sc) 

433 for sc in ( 

434 "StructuredComposite", 

435 "StructuredCompositeTestA", 

436 "StructuredCompositeTestB", 

437 ) 

438 ] 

439 

440 dimensions = self.universe.extract(("visit", "physical_filter")) 

441 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

442 

443 for sc in storageClasses: 

444 print("Using storageClass: {}".format(sc.name)) 

445 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

446 

447 components = sc.delegate().disassemble(metrics) 

448 self.assertTrue(components) 

449 

450 compsRead = {} 

451 for compName, compInfo in components.items(): 

452 compRef = self.makeDatasetRef( 

453 ref.datasetType.componentTypeName(compName), 

454 dimensions, 

455 components[compName].storageClass, 

456 dataId, 

457 conform=False, 

458 ) 

459 

460 print("Writing component {} with {}".format(compName, compRef.datasetType.storageClass.name)) 

461 datastore.put(compInfo.component, compRef) 

462 

463 uri = datastore.getURI(compRef) 

464 self.assertEqual(uri.scheme, self.uriScheme) 

465 

466 compsRead[compName] = datastore.get(compRef) 

467 

468 # We can generate identical files for each storage class 

469 # so remove the component here 

470 datastore.remove(compRef) 

471 

472 # combine all the components we read back into a new composite 

473 metricsOut = sc.delegate().assemble(compsRead) 

474 self.assertEqual(metrics, metricsOut) 

475 

476 def prepDeleteTest(self, n_refs=1): 

477 metrics = makeExampleMetrics() 

478 datastore = self.makeDatastore() 

479 # Put 

480 dimensions = self.universe.extract(("visit", "physical_filter")) 

481 sc = self.storageClassFactory.getStorageClass("StructuredData") 

482 refs = [] 

483 for i in range(n_refs): 

484 dataId = FakeDataCoordinate.from_dict( 

485 {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U"} 

486 ) 

487 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

488 datastore.put(metrics, ref) 

489 

490 # Does it exist? 

491 self.assertTrue(datastore.exists(ref)) 

492 

493 # Get 

494 metricsOut = datastore.get(ref) 

495 self.assertEqual(metrics, metricsOut) 

496 refs.append(ref) 

497 

498 return datastore, *refs 

499 

500 def testRemove(self): 

501 datastore, ref = self.prepDeleteTest() 

502 

503 # Remove 

504 datastore.remove(ref) 

505 

506 # Does it exist? 

507 self.assertFalse(datastore.exists(ref)) 

508 

509 # Do we now get a predicted URI? 

510 uri = datastore.getURI(ref, predict=True) 

511 self.assertEqual(uri.fragment, "predicted") 

512 

513 # Get should now fail 

514 with self.assertRaises(FileNotFoundError): 

515 datastore.get(ref) 

516 # Can only delete once 

517 with self.assertRaises(FileNotFoundError): 

518 datastore.remove(ref) 

519 

520 def testForget(self): 

521 datastore, ref = self.prepDeleteTest() 

522 

523 # Remove 

524 datastore.forget([ref]) 

525 

526 # Does it exist (as far as we know)? 

527 self.assertFalse(datastore.exists(ref)) 

528 

529 # Do we now get a predicted URI? 

530 uri = datastore.getURI(ref, predict=True) 

531 self.assertEqual(uri.fragment, "predicted") 

532 

533 # Get should now fail 

534 with self.assertRaises(FileNotFoundError): 

535 datastore.get(ref) 

536 

537 # Forgetting again is a silent no-op 

538 datastore.forget([ref]) 

539 

540 # Predicted URI should still point to the file. 

541 self.assertTrue(uri.exists()) 

542 

543 def testTransfer(self): 

544 metrics = makeExampleMetrics() 

545 

546 dimensions = self.universe.extract(("visit", "physical_filter")) 

547 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"} 

548 

549 sc = self.storageClassFactory.getStorageClass("StructuredData") 

550 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

551 

552 inputDatastore = self.makeDatastore("test_input_datastore") 

553 outputDatastore = self.makeDatastore("test_output_datastore") 

554 

555 inputDatastore.put(metrics, ref) 

556 outputDatastore.transfer(inputDatastore, ref) 

557 

558 metricsOut = outputDatastore.get(ref) 

559 self.assertEqual(metrics, metricsOut) 

560 

561 def testBasicTransaction(self): 

562 datastore = self.makeDatastore() 

563 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

564 dimensions = self.universe.extract(("visit", "physical_filter")) 

565 nDatasets = 6 

566 dataIds = [{"instrument": "dummy", "visit": i, "physical_filter": "V"} for i in range(nDatasets)] 

567 data = [ 

568 ( 

569 self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False), 

570 makeExampleMetrics(), 

571 ) 

572 for dataId in dataIds 

573 ] 

574 succeed = data[: nDatasets // 2] 

575 fail = data[nDatasets // 2 :] 

576 # All datasets added in this transaction should continue to exist 

577 with datastore.transaction(): 

578 for ref, metrics in succeed: 

579 datastore.put(metrics, ref) 

580 # Whereas datasets added in this transaction should not 

581 with self.assertRaises(TransactionTestError): 

582 with datastore.transaction(): 

583 for ref, metrics in fail: 

584 datastore.put(metrics, ref) 

585 raise TransactionTestError("This should propagate out of the context manager") 

586 # Check for datasets that should exist 

587 for ref, metrics in succeed: 

588 # Does it exist? 

589 self.assertTrue(datastore.exists(ref)) 

590 # Get 

591 metricsOut = datastore.get(ref, parameters=None) 

592 self.assertEqual(metrics, metricsOut) 

593 # URI 

594 uri = datastore.getURI(ref) 

595 self.assertEqual(uri.scheme, self.uriScheme) 

596 # Check for datasets that should not exist 

597 for ref, _ in fail: 

598 # These should raise 

599 with self.assertRaises(FileNotFoundError): 

600 # non-existing file 

601 datastore.get(ref) 

602 with self.assertRaises(FileNotFoundError): 

603 datastore.getURI(ref) 

604 

605 def testNestedTransaction(self): 

606 datastore = self.makeDatastore() 

607 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

608 dimensions = self.universe.extract(("visit", "physical_filter")) 

609 metrics = makeExampleMetrics() 

610 

611 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

612 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

613 datastore.put(metrics, refBefore) 

614 with self.assertRaises(TransactionTestError): 

615 with datastore.transaction(): 

616 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V"} 

617 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

618 datastore.put(metrics, refOuter) 

619 with datastore.transaction(): 

620 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V"} 

621 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

622 datastore.put(metrics, refInner) 

623 # All datasets should exist 

624 for ref in (refBefore, refOuter, refInner): 

625 metricsOut = datastore.get(ref, parameters=None) 

626 self.assertEqual(metrics, metricsOut) 

627 raise TransactionTestError("This should roll back the transaction") 

628 # Dataset(s) inserted before the transaction should still exist 

629 metricsOut = datastore.get(refBefore, parameters=None) 

630 self.assertEqual(metrics, metricsOut) 

631 # But all datasets inserted during the (rolled back) transaction 

632 # should be gone 

633 with self.assertRaises(FileNotFoundError): 

634 datastore.get(refOuter) 

635 with self.assertRaises(FileNotFoundError): 

636 datastore.get(refInner) 

637 

638 def _prepareIngestTest(self): 

639 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

640 dimensions = self.universe.extract(("visit", "physical_filter")) 

641 metrics = makeExampleMetrics() 

642 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

643 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

644 return metrics, ref 

645 

646 def runIngestTest(self, func, expectOutput=True): 

647 metrics, ref = self._prepareIngestTest() 

648 # The file will be deleted after the test. 

649 # For symlink tests this leads to a situation where the datastore 

650 # points to a file that does not exist. This will make os.path.exist 

651 # return False but then the new symlink will fail with 

652 # FileExistsError later in the code so the test still passes. 

653 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path: 

654 with open(path, "w") as fd: 

655 yaml.dump(metrics._asdict(), stream=fd) 

656 func(metrics, path, ref) 

657 

658 def testIngestNoTransfer(self): 

659 """Test ingesting existing files with no transfer.""" 

660 for mode in (None, "auto"): 

661 

662 # Some datastores have auto but can't do in place transfer 

663 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

664 continue 

665 

666 with self.subTest(mode=mode): 

667 datastore = self.makeDatastore() 

668 

669 def succeed(obj, path, ref): 

670 """Ingest a file already in the datastore root.""" 

671 # first move it into the root, and adjust the path 

672 # accordingly 

673 path = shutil.copy(path, datastore.root.ospath) 

674 path = os.path.relpath(path, start=datastore.root.ospath) 

675 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

676 self.assertEqual(obj, datastore.get(ref)) 

677 

678 def failInputDoesNotExist(obj, path, ref): 

679 """Can't ingest files if we're given a bad path.""" 

680 with self.assertRaises(FileNotFoundError): 

681 datastore.ingest( 

682 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode 

683 ) 

684 self.assertFalse(datastore.exists(ref)) 

685 

686 def failOutsideRoot(obj, path, ref): 

687 """Can't ingest files outside of datastore root unless 

688 auto.""" 

689 if mode == "auto": 

690 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

691 self.assertTrue(datastore.exists(ref)) 

692 else: 

693 with self.assertRaises(RuntimeError): 

694 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

695 self.assertFalse(datastore.exists(ref)) 

696 

697 def failNotImplemented(obj, path, ref): 

698 with self.assertRaises(NotImplementedError): 

699 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

700 

701 if mode in self.ingestTransferModes: 

702 self.runIngestTest(failOutsideRoot) 

703 self.runIngestTest(failInputDoesNotExist) 

704 self.runIngestTest(succeed) 

705 else: 

706 self.runIngestTest(failNotImplemented) 

707 

708 def testIngestTransfer(self): 

709 """Test ingesting existing files after transferring them.""" 

710 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

711 with self.subTest(mode=mode): 

712 datastore = self.makeDatastore(mode) 

713 

714 def succeed(obj, path, ref): 

715 """Ingest a file by transferring it to the template 

716 location.""" 

717 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

718 self.assertEqual(obj, datastore.get(ref)) 

719 

720 def failInputDoesNotExist(obj, path, ref): 

721 """Can't ingest files if we're given a bad path.""" 

722 with self.assertRaises(FileNotFoundError): 

723 # Ensure the file does not look like it is in 

724 # datastore for auto mode 

725 datastore.ingest( 

726 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode 

727 ) 

728 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

729 

730 def failNotImplemented(obj, path, ref): 

731 with self.assertRaises(NotImplementedError): 

732 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

733 

734 if mode in self.ingestTransferModes: 

735 self.runIngestTest(failInputDoesNotExist) 

736 self.runIngestTest(succeed, expectOutput=(mode != "move")) 

737 else: 

738 self.runIngestTest(failNotImplemented) 

739 

740 def testIngestSymlinkOfSymlink(self): 

741 """Special test for symlink to a symlink ingest""" 

742 metrics, ref = self._prepareIngestTest() 

743 # The aim of this test is to create a dataset on disk, then 

744 # create a symlink to it and finally ingest the symlink such that 

745 # the symlink in the datastore points to the original dataset. 

746 for mode in ("symlink", "relsymlink"): 

747 if mode not in self.ingestTransferModes: 

748 continue 

749 

750 print(f"Trying mode {mode}") 

751 with lsst.utils.tests.getTempFilePath(".yaml") as realpath: 

752 with open(realpath, "w") as fd: 

753 yaml.dump(metrics._asdict(), stream=fd) 

754 with lsst.utils.tests.getTempFilePath(".yaml") as sympath: 

755 os.symlink(os.path.abspath(realpath), sympath) 

756 

757 datastore = self.makeDatastore() 

758 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

759 

760 uri = datastore.getURI(ref) 

761 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

762 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

763 

764 linkTarget = os.readlink(uri.ospath) 

765 if mode == "relsymlink": 

766 self.assertFalse(os.path.isabs(linkTarget)) 

767 else: 

768 self.assertEqual(linkTarget, os.path.abspath(realpath)) 

769 

770 # Check that we can get the dataset back regardless of mode 

771 metric2 = datastore.get(ref) 

772 self.assertEqual(metric2, metrics) 

773 

774 # Cleanup the file for next time round loop 

775 # since it will get the same file name in store 

776 datastore.remove(ref) 

777 

778 def testExportImportRecords(self): 

779 """Test for export_records and import_records methods.""" 

780 

781 datastore = self.makeDatastore("test_datastore") 

782 

783 # For now only the FileDatastore can be used for this test. 

784 # ChainedDatastore that only includes InMemoryDatastores have to be 

785 # skipped as well. 

786 for name in datastore.names: 

787 if not name.startswith("InMemoryDatastore"): 

788 break 

789 else: 

790 raise unittest.SkipTest("in-memory datastore does not support record export/import") 

791 

792 metrics = makeExampleMetrics() 

793 dimensions = self.universe.extract(("visit", "physical_filter")) 

794 sc = self.storageClassFactory.getStorageClass("StructuredData") 

795 

796 refs = [] 

797 for visit in (2048, 2049, 2050): 

798 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime"} 

799 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

800 datastore.put(metrics, ref) 

801 refs.append(ref) 

802 

803 for exported_refs in (refs, refs[1:]): 

804 n_refs = len(exported_refs) 

805 records = datastore.export_records(exported_refs) 

806 self.assertGreater(len(records), 0) 

807 self.assertTrue(set(records.keys()) <= set(datastore.names)) 

808 # In a ChainedDatastore each FileDatastore will have a complete set 

809 for datastore_name in records: 

810 record_data = records[datastore_name] 

811 self.assertEqual(len(record_data.refs), n_refs) 

812 self.assertEqual(len(list(chain(*record_data.records.values()))), n_refs) 

813 

814 # Use the same datastore name to import relative path. 

815 datastore2 = self.makeDatastore("test_datastore") 

816 

817 records = datastore.export_records(refs[1:]) 

818 datastore2.import_records(records) 

819 

820 with self.assertRaises(FileNotFoundError): 

821 data = datastore2.get(refs[0]) 

822 data = datastore2.get(refs[1]) 

823 self.assertIsNotNone(data) 

824 data = datastore2.get(refs[2]) 

825 self.assertIsNotNone(data) 

826 

827 

828class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

829 """PosixDatastore specialization""" 

830 

831 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

832 uriScheme = "file" 

833 canIngestNoTransferAuto = True 

834 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

835 isEphemeral = False 

836 rootKeys = ("root",) 

837 validationCanFail = True 

838 

839 def setUp(self): 

840 # Override the working directory before calling the base class 

841 self.root = tempfile.mkdtemp(dir=TESTDIR) 

842 super().setUp() 

843 

844 

845class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

846 """Posix datastore tests but with checksums disabled.""" 

847 

848 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

849 

850 def testChecksum(self): 

851 """Ensure that checksums have not been calculated.""" 

852 

853 datastore = self.makeDatastore() 

854 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

855 dimensions = self.universe.extract(("visit", "physical_filter")) 

856 metrics = makeExampleMetrics() 

857 

858 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

859 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

860 

861 # Configuration should have disabled checksum calculation 

862 datastore.put(metrics, ref) 

863 infos = datastore.getStoredItemsInfo(ref) 

864 self.assertIsNone(infos[0].checksum) 

865 

866 # Remove put back but with checksums enabled explicitly 

867 datastore.remove(ref) 

868 datastore.useChecksum = True 

869 datastore.put(metrics, ref) 

870 

871 infos = datastore.getStoredItemsInfo(ref) 

872 self.assertIsNotNone(infos[0].checksum) 

873 

874 

875class TrashDatastoreTestCase(PosixDatastoreTestCase): 

876 """Restrict trash test to FileDatastore.""" 

877 

878 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

879 

880 def testTrash(self): 

881 datastore, *refs = self.prepDeleteTest(n_refs=10) 

882 

883 # Trash one of them. 

884 ref = refs.pop() 

885 uri = datastore.getURI(ref) 

886 datastore.trash(ref) 

887 self.assertTrue(uri.exists(), uri) # Not deleted yet 

888 datastore.emptyTrash() 

889 self.assertFalse(uri.exists(), uri) 

890 

891 # Trash it again should be fine. 

892 datastore.trash(ref) 

893 

894 # Trash multiple items at once. 

895 subset = [refs.pop(), refs.pop()] 

896 datastore.trash(subset) 

897 datastore.emptyTrash() 

898 

899 # Remove a record and trash should do nothing. 

900 # This is execution butler scenario. 

901 ref = refs.pop() 

902 uri = datastore.getURI(ref) 

903 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

904 self.assertTrue(uri.exists()) 

905 datastore.trash(ref) 

906 datastore.emptyTrash() 

907 self.assertTrue(uri.exists()) 

908 

909 # Switch on trust and it should delete the file. 

910 datastore.trustGetRequest = True 

911 datastore.trash([ref]) 

912 self.assertFalse(uri.exists()) 

913 

914 # Remove multiples at once in trust mode. 

915 subset = [refs.pop() for i in range(3)] 

916 datastore.trash(subset) 

917 datastore.trash(refs.pop()) # Check that a single ref can trash 

918 

919 

920class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

921 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

922 

923 def setUp(self): 

924 # Override the working directory before calling the base class 

925 self.root = tempfile.mkdtemp(dir=TESTDIR) 

926 super().setUp() 

927 

928 def testCleanup(self): 

929 """Test that a failed formatter write does cleanup a partial file.""" 

930 metrics = makeExampleMetrics() 

931 datastore = self.makeDatastore() 

932 

933 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

934 

935 dimensions = self.universe.extract(("visit", "physical_filter")) 

936 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

937 

938 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

939 

940 # Determine where the file will end up (we assume Formatters use 

941 # the same file extension) 

942 expectedUri = datastore.getURI(ref, predict=True) 

943 self.assertEqual(expectedUri.fragment, "predicted") 

944 

945 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}") 

946 

947 # Try formatter that fails and formatter that fails and leaves 

948 # a file behind 

949 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

950 with self.subTest(formatter=formatter): 

951 

952 # Monkey patch the formatter 

953 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True) 

954 

955 # Try to put the dataset, it should fail 

956 with self.assertRaises(Exception): 

957 datastore.put(metrics, ref) 

958 

959 # Check that there is no file on disk 

960 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

961 

962 # Check that there is a directory 

963 dir = expectedUri.dirname() 

964 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}") 

965 

966 # Force YamlFormatter and check that this time a file is written 

967 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True) 

968 datastore.put(metrics, ref) 

969 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

970 datastore.remove(ref) 

971 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

972 

973 

974class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

975 """PosixDatastore specialization""" 

976 

977 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

978 uriScheme = "mem" 

979 hasUnsupportedPut = False 

980 ingestTransferModes = () 

981 isEphemeral = True 

982 rootKeys = None 

983 validationCanFail = False 

984 

985 

986class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

987 """ChainedDatastore specialization using a POSIXDatastore""" 

988 

989 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

990 hasUnsupportedPut = False 

991 canIngestNoTransferAuto = False 

992 ingestTransferModes = ("copy", "hardlink", "symlink", "relsymlink", "link", "auto") 

993 isEphemeral = False 

994 rootKeys = (".datastores.1.root", ".datastores.2.root") 

995 validationCanFail = True 

996 

997 

998class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

999 """ChainedDatastore specialization using all InMemoryDatastore""" 

1000 

1001 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

1002 validationCanFail = False 

1003 

1004 

1005class DatastoreConstraintsTests(DatastoreTestsBase): 

1006 """Basic tests of constraints model of Datastores.""" 

1007 

1008 def testConstraints(self): 

1009 """Test constraints model. Assumes that each test class has the 

1010 same constraints.""" 

1011 metrics = makeExampleMetrics() 

1012 datastore = self.makeDatastore() 

1013 

1014 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1015 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1016 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1017 dataId = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

1018 

1019 # Write empty file suitable for ingest check (JSON and YAML variants) 

1020 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1021 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1022 for datasetTypeName, sc, accepted in ( 

1023 ("metric", sc1, True), 

1024 ("metric2", sc1, False), 

1025 ("metric33", sc1, True), 

1026 ("metric2", sc2, True), 

1027 ): 

1028 # Choose different temp file depending on StorageClass 

1029 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1030 

1031 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

1032 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

1033 if accepted: 

1034 datastore.put(metrics, ref) 

1035 self.assertTrue(datastore.exists(ref)) 

1036 datastore.remove(ref) 

1037 

1038 # Try ingest 

1039 if self.canIngest: 

1040 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1041 self.assertTrue(datastore.exists(ref)) 

1042 datastore.remove(ref) 

1043 else: 

1044 with self.assertRaises(DatasetTypeNotSupportedError): 

1045 datastore.put(metrics, ref) 

1046 self.assertFalse(datastore.exists(ref)) 

1047 

1048 # Again with ingest 

1049 if self.canIngest: 

1050 with self.assertRaises(DatasetTypeNotSupportedError): 

1051 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1052 self.assertFalse(datastore.exists(ref)) 

1053 

1054 

1055class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1056 """PosixDatastore specialization""" 

1057 

1058 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

1059 canIngest = True 

1060 

1061 def setUp(self): 

1062 # Override the working directory before calling the base class 

1063 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1064 super().setUp() 

1065 

1066 

1067class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1068 """InMemoryDatastore specialization""" 

1069 

1070 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

1071 canIngest = False 

1072 

1073 

1074class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

1075 """ChainedDatastore specialization using a POSIXDatastore and constraints 

1076 at the ChainedDatstore""" 

1077 

1078 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

1079 

1080 

1081class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

1082 """ChainedDatastore specialization using a POSIXDatastore""" 

1083 

1084 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

1085 

1086 

1087class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

1088 """ChainedDatastore specialization using all InMemoryDatastore""" 

1089 

1090 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

1091 canIngest = False 

1092 

1093 

1094class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

1095 """Test that a chained datastore can control constraints per-datastore 

1096 even if child datastore would accept.""" 

1097 

1098 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

1099 

1100 def setUp(self): 

1101 # Override the working directory before calling the base class 

1102 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1103 super().setUp() 

1104 

1105 def testConstraints(self): 

1106 """Test chained datastore constraints model.""" 

1107 metrics = makeExampleMetrics() 

1108 datastore = self.makeDatastore() 

1109 

1110 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1111 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1112 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1113 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

1114 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"} 

1115 

1116 # Write empty file suitable for ingest check (JSON and YAML variants) 

1117 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1118 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1119 

1120 for typeName, dataId, sc, accept, ingest in ( 

1121 ("metric", dataId1, sc1, (False, True, False), True), 

1122 ("metric2", dataId1, sc1, (False, False, False), False), 

1123 ("metric2", dataId2, sc1, (True, False, False), False), 

1124 ("metric33", dataId2, sc2, (True, True, False), True), 

1125 ("metric2", dataId1, sc2, (False, True, False), True), 

1126 ): 

1127 

1128 # Choose different temp file depending on StorageClass 

1129 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1130 

1131 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

1132 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, conform=False) 

1133 if any(accept): 

1134 datastore.put(metrics, ref) 

1135 self.assertTrue(datastore.exists(ref)) 

1136 

1137 # Check each datastore inside the chained datastore 

1138 for childDatastore, expected in zip(datastore.datastores, accept): 

1139 self.assertEqual( 

1140 childDatastore.exists(ref), 

1141 expected, 

1142 f"Testing presence of {ref} in datastore {childDatastore.name}", 

1143 ) 

1144 

1145 datastore.remove(ref) 

1146 

1147 # Check that ingest works 

1148 if ingest: 

1149 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1150 self.assertTrue(datastore.exists(ref)) 

1151 

1152 # Check each datastore inside the chained datastore 

1153 for childDatastore, expected in zip(datastore.datastores, accept): 

1154 # Ephemeral datastores means InMemory at the moment 

1155 # and that does not accept ingest of files. 

1156 if childDatastore.isEphemeral: 

1157 expected = False 

1158 self.assertEqual( 

1159 childDatastore.exists(ref), 

1160 expected, 

1161 f"Testing presence of ingested {ref} in datastore {childDatastore.name}", 

1162 ) 

1163 

1164 datastore.remove(ref) 

1165 else: 

1166 with self.assertRaises(DatasetTypeNotSupportedError): 

1167 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1168 

1169 else: 

1170 with self.assertRaises(DatasetTypeNotSupportedError): 

1171 datastore.put(metrics, ref) 

1172 self.assertFalse(datastore.exists(ref)) 

1173 

1174 # Again with ingest 

1175 with self.assertRaises(DatasetTypeNotSupportedError): 

1176 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1177 self.assertFalse(datastore.exists(ref)) 

1178 

1179 

1180class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase): 

1181 """Tests for datastore caching infrastructure.""" 

1182 

1183 @classmethod 

1184 def setUpClass(cls): 

1185 cls.storageClassFactory = StorageClassFactory() 

1186 cls.universe = DimensionUniverse() 

1187 

1188 # Ensure that we load the test storage class definitions. 

1189 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

1190 cls.storageClassFactory.addFromConfig(scConfigFile) 

1191 

1192 def setUp(self): 

1193 self.id = 0 

1194 

1195 # Create a root that we can use for caching tests. 

1196 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1197 

1198 # Create some test dataset refs and associated test files 

1199 sc = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1200 dimensions = self.universe.extract(("visit", "physical_filter")) 

1201 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

1202 

1203 # Create list of refs and list of temporary files 

1204 n_datasets = 10 

1205 self.refs = [ 

1206 self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, conform=False) 

1207 for n in range(n_datasets) 

1208 ] 

1209 

1210 root_uri = ResourcePath(self.root, forceDirectory=True) 

1211 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)] 

1212 

1213 # Create test files. 

1214 for uri in self.files: 

1215 uri.write(b"0123456789") 

1216 

1217 # Create some composite refs with component files. 

1218 sc = self.storageClassFactory.getStorageClass("StructuredData") 

1219 self.composite_refs = [ 

1220 self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, conform=False) for n in range(3) 

1221 ] 

1222 self.comp_files = [] 

1223 self.comp_refs = [] 

1224 for n, ref in enumerate(self.composite_refs): 

1225 component_refs = [] 

1226 component_files = [] 

1227 for component in sc.components: 

1228 component_ref = ref.makeComponentRef(component) 

1229 file = root_uri.join(f"composite_file-{n}-{component}.txt") 

1230 component_refs.append(component_ref) 

1231 component_files.append(file) 

1232 file.write(b"9876543210") 

1233 

1234 self.comp_files.append(component_files) 

1235 self.comp_refs.append(component_refs) 

1236 

1237 def tearDown(self): 

1238 if self.root is not None and os.path.exists(self.root): 

1239 shutil.rmtree(self.root, ignore_errors=True) 

1240 

1241 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager: 

1242 config = Config.fromYaml(config_str) 

1243 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe) 

1244 

1245 def testNoCacheDir(self): 

1246 config_str = """ 

1247cached: 

1248 root: null 

1249 cacheable: 

1250 metric0: true 

1251 """ 

1252 cache_manager = self._make_cache_manager(config_str) 

1253 

1254 # Look inside to check we don't have a cache directory 

1255 self.assertIsNone(cache_manager._cache_directory) 

1256 

1257 self.assertCache(cache_manager) 

1258 

1259 # Test that the cache directory is marked temporary 

1260 self.assertTrue(cache_manager.cache_directory.isTemporary) 

1261 

1262 def testNoCacheDirReversed(self): 

1263 """Use default caching status and metric1 to false""" 

1264 config_str = """ 

1265cached: 

1266 root: null 

1267 default: true 

1268 cacheable: 

1269 metric1: false 

1270 """ 

1271 cache_manager = self._make_cache_manager(config_str) 

1272 

1273 self.assertCache(cache_manager) 

1274 

1275 def testExplicitCacheDir(self): 

1276 config_str = f""" 

1277cached: 

1278 root: '{self.root}' 

1279 cacheable: 

1280 metric0: true 

1281 """ 

1282 cache_manager = self._make_cache_manager(config_str) 

1283 

1284 # Look inside to check we do have a cache directory. 

1285 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True)) 

1286 

1287 self.assertCache(cache_manager) 

1288 

1289 # Test that the cache directory is not marked temporary 

1290 self.assertFalse(cache_manager.cache_directory.isTemporary) 

1291 

1292 def assertCache(self, cache_manager): 

1293 self.assertTrue(cache_manager.should_be_cached(self.refs[0])) 

1294 self.assertFalse(cache_manager.should_be_cached(self.refs[1])) 

1295 

1296 uri = cache_manager.move_to_cache(self.files[0], self.refs[0]) 

1297 self.assertIsInstance(uri, ResourcePath) 

1298 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1])) 

1299 

1300 # Check presence in cache using ref and then using file extension. 

1301 self.assertFalse(cache_manager.known_to_cache(self.refs[1])) 

1302 self.assertTrue(cache_manager.known_to_cache(self.refs[0])) 

1303 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension())) 

1304 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension())) 

1305 

1306 # Cached file should no longer exist but uncached file should be 

1307 # unaffected. 

1308 self.assertFalse(self.files[0].exists()) 

1309 self.assertTrue(self.files[1].exists()) 

1310 

1311 # Should find this file and it should be within the cache directory. 

1312 with cache_manager.find_in_cache(self.refs[0], ".txt") as found: 

1313 self.assertTrue(found.exists()) 

1314 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory)) 

1315 

1316 # Should not be able to find these in cache 

1317 with cache_manager.find_in_cache(self.refs[0], ".fits") as found: 

1318 self.assertIsNone(found) 

1319 with cache_manager.find_in_cache(self.refs[1], ".fits") as found: 

1320 self.assertIsNone(found) 

1321 

1322 def testNoCache(self): 

1323 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe) 

1324 for uri, ref in zip(self.files, self.refs): 

1325 self.assertFalse(cache_manager.should_be_cached(ref)) 

1326 self.assertIsNone(cache_manager.move_to_cache(uri, ref)) 

1327 self.assertFalse(cache_manager.known_to_cache(ref)) 

1328 with cache_manager.find_in_cache(ref, ".txt") as found: 

1329 self.assertIsNone(found, msg=f"{cache_manager}") 

1330 

1331 def _expiration_config(self, mode: str, threshold: int) -> str: 

1332 return f""" 

1333cached: 

1334 default: true 

1335 expiry: 

1336 mode: {mode} 

1337 threshold: {threshold} 

1338 cacheable: 

1339 unused: true 

1340 """ 

1341 

1342 def testCacheExpiryFiles(self): 

1343 threshold = 2 # Keep at least 2 files. 

1344 mode = "files" 

1345 config_str = self._expiration_config(mode, threshold) 

1346 

1347 cache_manager = self._make_cache_manager(config_str) 

1348 

1349 # Check that an empty cache returns unknown for arbitrary ref 

1350 self.assertFalse(cache_manager.known_to_cache(self.refs[0])) 

1351 

1352 # Should end with datasets: 2, 3, 4 

1353 self.assertExpiration(cache_manager, 5, threshold + 1) 

1354 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1355 

1356 # Check that we will not expire a file that is actively in use. 

1357 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1358 self.assertIsNotNone(found) 

1359 

1360 # Trigger cache expiration that should remove the file 

1361 # we just retrieved. Should now have: 3, 4, 5 

1362 cached = cache_manager.move_to_cache(self.files[5], self.refs[5]) 

1363 self.assertIsNotNone(cached) 

1364 

1365 # Cache should still report the standard file count. 

1366 self.assertEqual(cache_manager.file_count, threshold + 1) 

1367 

1368 # Add additional entry to cache. 

1369 # Should now have 4, 5, 6 

1370 cached = cache_manager.move_to_cache(self.files[6], self.refs[6]) 

1371 self.assertIsNotNone(cached) 

1372 

1373 # Is the file still there? 

1374 self.assertTrue(found.exists()) 

1375 

1376 # Can we read it? 

1377 data = found.read() 

1378 self.assertGreater(len(data), 0) 

1379 

1380 # Outside context the file should no longer exist. 

1381 self.assertFalse(found.exists()) 

1382 

1383 # File count should not have changed. 

1384 self.assertEqual(cache_manager.file_count, threshold + 1) 

1385 

1386 # Dataset 2 was in the exempt directory but because hardlinks 

1387 # are used it was deleted from the main cache during cache expiry 

1388 # above and so should no longer be found. 

1389 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1390 self.assertIsNone(found) 

1391 

1392 # And the one stored after it is also gone. 

1393 with cache_manager.find_in_cache(self.refs[3], ".txt") as found: 

1394 self.assertIsNone(found) 

1395 

1396 # But dataset 4 is present. 

1397 with cache_manager.find_in_cache(self.refs[4], ".txt") as found: 

1398 self.assertIsNotNone(found) 

1399 

1400 # Adding a new dataset to the cache should now delete it. 

1401 cache_manager.move_to_cache(self.files[7], self.refs[7]) 

1402 

1403 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1404 self.assertIsNone(found) 

1405 

1406 def testCacheExpiryDatasets(self): 

1407 threshold = 2 # Keep 2 datasets. 

1408 mode = "datasets" 

1409 config_str = self._expiration_config(mode, threshold) 

1410 

1411 cache_manager = self._make_cache_manager(config_str) 

1412 self.assertExpiration(cache_manager, 5, threshold + 1) 

1413 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1414 

1415 def testCacheExpiryDatasetsComposite(self): 

1416 threshold = 2 # Keep 2 datasets. 

1417 mode = "datasets" 

1418 config_str = self._expiration_config(mode, threshold) 

1419 

1420 cache_manager = self._make_cache_manager(config_str) 

1421 

1422 n_datasets = 3 

1423 for i in range(n_datasets): 

1424 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]): 

1425 cached = cache_manager.move_to_cache(component_file, component_ref) 

1426 self.assertIsNotNone(cached) 

1427 self.assertTrue(cache_manager.known_to_cache(component_ref)) 

1428 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef())) 

1429 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension())) 

1430 

1431 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files 

1432 

1433 # Write two new non-composite and the number of files should drop. 

1434 self.assertExpiration(cache_manager, 2, 5) 

1435 

1436 def testCacheExpirySize(self): 

1437 threshold = 55 # Each file is 10 bytes 

1438 mode = "size" 

1439 config_str = self._expiration_config(mode, threshold) 

1440 

1441 cache_manager = self._make_cache_manager(config_str) 

1442 self.assertExpiration(cache_manager, 10, 6) 

1443 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1444 

1445 def assertExpiration(self, cache_manager, n_datasets, n_retained): 

1446 """Insert the datasets and then check the number retained.""" 

1447 for i in range(n_datasets): 

1448 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1449 self.assertIsNotNone(cached) 

1450 

1451 self.assertEqual(cache_manager.file_count, n_retained) 

1452 

1453 # The oldest file should not be in the cache any more. 

1454 for i in range(n_datasets): 

1455 with cache_manager.find_in_cache(self.refs[i], ".txt") as found: 

1456 if i >= n_datasets - n_retained: 

1457 self.assertIsInstance(found, ResourcePath) 

1458 else: 

1459 self.assertIsNone(found) 

1460 

1461 def testCacheExpiryAge(self): 

1462 threshold = 1 # Expire older than 2 seconds 

1463 mode = "age" 

1464 config_str = self._expiration_config(mode, threshold) 

1465 

1466 cache_manager = self._make_cache_manager(config_str) 

1467 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1468 

1469 # Insert 3 files, then sleep, then insert more. 

1470 for i in range(2): 

1471 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1472 self.assertIsNotNone(cached) 

1473 time.sleep(2.0) 

1474 for j in range(4): 

1475 i = 2 + j # Continue the counting 

1476 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1477 self.assertIsNotNone(cached) 

1478 

1479 # Only the files written after the sleep should exist. 

1480 self.assertEqual(cache_manager.file_count, 4) 

1481 with cache_manager.find_in_cache(self.refs[1], ".txt") as found: 

1482 self.assertIsNone(found) 

1483 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1484 self.assertIsInstance(found, ResourcePath) 

1485 

1486 

1487if __name__ == "__main__": 1487 ↛ 1488line 1487 didn't jump to line 1488, because the condition on line 1487 was never true

1488 unittest.main()