Coverage for tests/test_datastore.py: 12%

1053 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-07 10:08 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import os 

25import shutil 

26import sys 

27import tempfile 

28import time 

29import unittest 

30import unittest.mock 

31from collections import UserDict 

32from dataclasses import dataclass 

33 

34import lsst.utils.tests 

35import yaml 

36from lsst.daf.butler import ( 

37 Config, 

38 DataCoordinate, 

39 DatasetRef, 

40 DatasetRefURIs, 

41 DatasetTypeNotSupportedError, 

42 Datastore, 

43 DatastoreCacheManager, 

44 DatastoreCacheManagerConfig, 

45 DatastoreConfig, 

46 DatastoreDisabledCacheManager, 

47 DatastoreValidationError, 

48 DimensionUniverse, 

49 FileDataset, 

50 NamedKeyDict, 

51 StorageClass, 

52 StorageClassFactory, 

53) 

54from lsst.daf.butler.formatters.yaml import YamlFormatter 

55from lsst.daf.butler.tests import ( 

56 BadNoWriteFormatter, 

57 BadWriteFormatter, 

58 DatasetTestHelper, 

59 DatastoreTestHelper, 

60 DummyRegistry, 

61 MetricsExample, 

62 MetricsExampleDataclass, 

63 MetricsExampleModel, 

64) 

65from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel 

66from lsst.resources import ResourcePath 

67from lsst.utils import doImport 

68 

69TESTDIR = os.path.dirname(__file__) 

70 

71 

72class DataIdForTest(UserDict): 

73 

74 """A dict-like class that can be used for a DataId dict that is hashable. 

75 

76 By default the class is immutable ("frozen"). The `frozen` 

77 attribute can be set to `False` to change values (but note that 

78 the hash values before and after mutation will be different!). 

79 """ 

80 

81 def __init__(self, *args, **kwargs): 

82 self.frozen = False 

83 super().__init__(*args, **kwargs) 

84 self.frozen = True 

85 

86 def __hash__(self): 

87 return hash(str(self.data)) 

88 

89 def __setitem__(self, k, v): 

90 if self.frozen: 

91 raise RuntimeError("DataIdForTest is frozen.") 

92 return super().__setitem__(k, v) 

93 

94 def __delitem__(self, k): 

95 if self.frozen: 

96 raise RuntimeError("DataIdForTest is frozen.") 

97 return super().__delitem__(k) 

98 

99 def __ior__(self, other): 

100 assert sys.version_info[0] == 3 

101 if sys.version_info[1] < 9: 

102 raise NotImplementedError("operator |= (ior) is not supported before version 3.9") 

103 if self.frozen: 

104 raise RuntimeError("DataIdForTest is frozen.") 

105 return super().__ior__(other) 

106 

107 def pop(self, k): 

108 if self.frozen: 

109 raise RuntimeError("DataIdForTest is frozen.") 

110 return super().pop(k) 

111 

112 def popitem(self): 

113 if self.frozen: 

114 raise RuntimeError("DataIdForTest is frozen.") 

115 return super().popitem() 

116 

117 def update(self, *args, **kwargs): 

118 if self.frozen: 

119 raise RuntimeError("DataIdForTest is frozen.") 

120 super().update(*args, **kwargs) 

121 

122 

123def makeExampleMetrics(use_none=False): 

124 if use_none: 

125 array = None 

126 else: 

127 array = [563, 234, 456.7, 105, 2054, -1045] 

128 return MetricsExample( 

129 {"AM1": 5.2, "AM2": 30.6}, 

130 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

131 array, 

132 ) 

133 

134 

135@dataclass(frozen=True) 

136class Named: 

137 name: str 

138 

139 

140class FakeDataCoordinate(NamedKeyDict): 

141 """A fake hashable frozen DataCoordinate built from a simple dict.""" 

142 

143 @classmethod 

144 def from_dict(cls, dataId): 

145 new = cls() 

146 for k, v in dataId.items(): 

147 new[Named(k)] = v 

148 return new.freeze() 

149 

150 def __hash__(self) -> int: 

151 return hash(frozenset(self.items())) 

152 

153 

154class TransactionTestError(Exception): 

155 """Specific error for transactions, to prevent misdiagnosing 

156 that might otherwise occur when a standard exception is used. 

157 """ 

158 

159 pass 

160 

161 

162class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper): 

163 """Support routines for datastore testing""" 

164 

165 root = None 

166 

167 @classmethod 

168 def setUpClass(cls): 

169 # Storage Classes are fixed for all datastores in these tests 

170 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

171 cls.storageClassFactory = StorageClassFactory() 

172 cls.storageClassFactory.addFromConfig(scConfigFile) 

173 

174 # Read the Datastore config so we can get the class 

175 # information (since we should not assume the constructor 

176 # name here, but rely on the configuration file itself) 

177 datastoreConfig = DatastoreConfig(cls.configFile) 

178 cls.datastoreType = doImport(datastoreConfig["cls"]) 

179 cls.universe = DimensionUniverse() 

180 

181 def setUp(self): 

182 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

183 

184 def tearDown(self): 

185 if self.root is not None and os.path.exists(self.root): 

186 shutil.rmtree(self.root, ignore_errors=True) 

187 

188 

189class DatastoreTests(DatastoreTestsBase): 

190 """Some basic tests of a simple datastore.""" 

191 

192 hasUnsupportedPut = True 

193 

194 def testConfigRoot(self): 

195 full = DatastoreConfig(self.configFile) 

196 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

197 newroot = "/random/location" 

198 self.datastoreType.setConfigRoot(newroot, config, full) 

199 if self.rootKeys: 

200 for k in self.rootKeys: 

201 self.assertIn(newroot, config[k]) 

202 

203 def testConstructor(self): 

204 datastore = self.makeDatastore() 

205 self.assertIsNotNone(datastore) 

206 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

207 

208 def testConfigurationValidation(self): 

209 datastore = self.makeDatastore() 

210 sc = self.storageClassFactory.getStorageClass("ThingOne") 

211 datastore.validateConfiguration([sc]) 

212 

213 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

214 if self.validationCanFail: 

215 with self.assertRaises(DatastoreValidationError): 

216 datastore.validateConfiguration([sc2], logFailures=True) 

217 

218 dimensions = self.universe.extract(("visit", "physical_filter")) 

219 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

220 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

221 datastore.validateConfiguration([ref]) 

222 

223 def testParameterValidation(self): 

224 """Check that parameters are validated""" 

225 sc = self.storageClassFactory.getStorageClass("ThingOne") 

226 dimensions = self.universe.extract(("visit", "physical_filter")) 

227 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

228 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

229 datastore = self.makeDatastore() 

230 data = {1: 2, 3: 4} 

231 datastore.put(data, ref) 

232 newdata = datastore.get(ref) 

233 self.assertEqual(data, newdata) 

234 with self.assertRaises(KeyError): 

235 newdata = datastore.get(ref, parameters={"missing": 5}) 

236 

237 def testBasicPutGet(self): 

238 metrics = makeExampleMetrics() 

239 datastore = self.makeDatastore() 

240 

241 # Create multiple storage classes for testing different formulations 

242 storageClasses = [ 

243 self.storageClassFactory.getStorageClass(sc) 

244 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle") 

245 ] 

246 

247 dimensions = self.universe.extract(("visit", "physical_filter")) 

248 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

249 dataId2 = DataIdForTest({"instrument": "dummy", "visit": 53, "physical_filter": "V"}) 

250 

251 for sc in storageClasses: 

252 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

253 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2, conform=False) 

254 

255 # Make sure that using getManyURIs without predicting before the 

256 # dataset has been put raises. 

257 with self.assertRaises(FileNotFoundError): 

258 datastore.getManyURIs([ref], predict=False) 

259 

260 # Make sure that using getManyURIs with predicting before the 

261 # dataset has been put predicts the URI. 

262 uris = datastore.getManyURIs([ref, ref2], predict=True) 

263 self.assertIn("52", uris[ref].primaryURI.geturl()) 

264 self.assertIn("#predicted", uris[ref].primaryURI.geturl()) 

265 self.assertIn("53", uris[ref2].primaryURI.geturl()) 

266 self.assertIn("#predicted", uris[ref2].primaryURI.geturl()) 

267 

268 datastore.put(metrics, ref) 

269 

270 # Does it exist? 

271 self.assertTrue(datastore.exists(ref)) 

272 self.assertTrue(datastore.knows(ref)) 

273 multi = datastore.knows_these([ref]) 

274 self.assertTrue(multi[ref]) 

275 multi = datastore.mexists([ref]) 

276 self.assertTrue(multi[ref]) 

277 

278 # Get 

279 metricsOut = datastore.get(ref, parameters=None) 

280 self.assertEqual(metrics, metricsOut) 

281 

282 uri = datastore.getURI(ref) 

283 self.assertEqual(uri.scheme, self.uriScheme) 

284 

285 uris = datastore.getManyURIs([ref]) 

286 self.assertEqual(len(uris), 1) 

287 ref, uri = uris.popitem() 

288 self.assertTrue(uri.primaryURI.exists()) 

289 self.assertFalse(uri.componentURIs) 

290 

291 # Get a component -- we need to construct new refs for them 

292 # with derived storage classes but with parent ID 

293 for comp in ("data", "output"): 

294 compRef = ref.makeComponentRef(comp) 

295 output = datastore.get(compRef) 

296 self.assertEqual(output, getattr(metricsOut, comp)) 

297 

298 uri = datastore.getURI(compRef) 

299 self.assertEqual(uri.scheme, self.uriScheme) 

300 

301 uris = datastore.getManyURIs([compRef]) 

302 self.assertEqual(len(uris), 1) 

303 

304 storageClass = sc 

305 

306 # Check that we can put a metric with None in a component and 

307 # get it back as None 

308 metricsNone = makeExampleMetrics(use_none=True) 

309 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"} 

310 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False) 

311 datastore.put(metricsNone, refNone) 

312 

313 comp = "data" 

314 for comp in ("data", "output"): 

315 compRef = refNone.makeComponentRef(comp) 

316 output = datastore.get(compRef) 

317 self.assertEqual(output, getattr(metricsNone, comp)) 

318 

319 # Check that a put fails if the dataset type is not supported 

320 if self.hasUnsupportedPut: 

321 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

322 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

323 with self.assertRaises(DatasetTypeNotSupportedError): 

324 datastore.put(metrics, ref) 

325 

326 # These should raise 

327 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000) 

328 with self.assertRaises(FileNotFoundError): 

329 # non-existing file 

330 datastore.get(ref) 

331 

332 # Get a URI from it 

333 uri = datastore.getURI(ref, predict=True) 

334 self.assertEqual(uri.scheme, self.uriScheme) 

335 

336 with self.assertRaises(FileNotFoundError): 

337 datastore.getURI(ref) 

338 

339 def testTrustGetRequest(self): 

340 """Check that we can get datasets that registry knows nothing about.""" 

341 

342 datastore = self.makeDatastore() 

343 

344 # Skip test if the attribute is not defined 

345 if not hasattr(datastore, "trustGetRequest"): 

346 return 

347 

348 metrics = makeExampleMetrics() 

349 

350 i = 0 

351 for sc_name in ("StructuredData", "StructuredComposite"): 

352 i += 1 

353 datasetTypeName = f"test_metric{i}" # Different dataset type name each time. 

354 

355 if sc_name == "StructuredComposite": 

356 disassembled = True 

357 else: 

358 disassembled = False 

359 

360 # Start datastore in default configuration of using registry 

361 datastore.trustGetRequest = False 

362 

363 # Create multiple storage classes for testing with or without 

364 # disassembly 

365 sc = self.storageClassFactory.getStorageClass(sc_name) 

366 dimensions = self.universe.extract(("visit", "physical_filter")) 

367 

368 dataId = DataIdForTest({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"}) 

369 

370 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

371 datastore.put(metrics, ref) 

372 

373 # Does it exist? 

374 self.assertTrue(datastore.exists(ref)) 

375 self.assertTrue(datastore.knows(ref)) 

376 multi = datastore.knows_these([ref]) 

377 self.assertTrue(multi[ref]) 

378 multi = datastore.mexists([ref]) 

379 self.assertTrue(multi[ref]) 

380 

381 # Get 

382 metricsOut = datastore.get(ref) 

383 self.assertEqual(metrics, metricsOut) 

384 

385 # Get the URI(s) 

386 primaryURI, componentURIs = datastore.getURIs(ref) 

387 if disassembled: 

388 self.assertIsNone(primaryURI) 

389 self.assertEqual(len(componentURIs), 3) 

390 else: 

391 self.assertIn(datasetTypeName, primaryURI.path) 

392 self.assertFalse(componentURIs) 

393 

394 # Delete registry entry so now we are trusting 

395 datastore.removeStoredItemInfo(ref) 

396 

397 # Now stop trusting and check that things break 

398 datastore.trustGetRequest = False 

399 

400 # Does it exist? 

401 self.assertFalse(datastore.exists(ref)) 

402 self.assertFalse(datastore.knows(ref)) 

403 multi = datastore.knows_these([ref]) 

404 self.assertFalse(multi[ref]) 

405 multi = datastore.mexists([ref]) 

406 self.assertFalse(multi[ref]) 

407 

408 with self.assertRaises(FileNotFoundError): 

409 datastore.get(ref) 

410 

411 with self.assertRaises(FileNotFoundError): 

412 datastore.get(ref.makeComponentRef("data")) 

413 

414 # URI should fail unless we ask for prediction 

415 with self.assertRaises(FileNotFoundError): 

416 datastore.getURIs(ref) 

417 

418 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

419 if disassembled: 

420 self.assertIsNone(predicted_primary) 

421 self.assertEqual(len(predicted_disassembled), 3) 

422 for uri in predicted_disassembled.values(): 

423 self.assertEqual(uri.fragment, "predicted") 

424 self.assertIn(datasetTypeName, uri.path) 

425 else: 

426 self.assertIn(datasetTypeName, predicted_primary.path) 

427 self.assertFalse(predicted_disassembled) 

428 self.assertEqual(predicted_primary.fragment, "predicted") 

429 

430 # Now enable registry-free trusting mode 

431 datastore.trustGetRequest = True 

432 

433 # Try again to get it 

434 metricsOut = datastore.get(ref) 

435 self.assertEqual(metricsOut, metrics) 

436 

437 # Does it exist? 

438 self.assertTrue(datastore.exists(ref)) 

439 

440 # Get a component 

441 comp = "data" 

442 compRef = ref.makeComponentRef(comp) 

443 output = datastore.get(compRef) 

444 self.assertEqual(output, getattr(metrics, comp)) 

445 

446 # Get the URI -- if we trust this should work even without 

447 # enabling prediction. 

448 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

449 self.assertEqual(primaryURI2, primaryURI) 

450 self.assertEqual(componentURIs2, componentURIs) 

451 

452 def testDisassembly(self): 

453 """Test disassembly within datastore.""" 

454 metrics = makeExampleMetrics() 

455 if self.isEphemeral: 

456 # in-memory datastore does not disassemble 

457 return 

458 

459 # Create multiple storage classes for testing different formulations 

460 # of composites. One of these will not disassemble to provide 

461 # a reference. 

462 storageClasses = [ 

463 self.storageClassFactory.getStorageClass(sc) 

464 for sc in ( 

465 "StructuredComposite", 

466 "StructuredCompositeTestA", 

467 "StructuredCompositeTestB", 

468 "StructuredCompositeReadComp", 

469 "StructuredData", # No disassembly 

470 "StructuredCompositeReadCompNoDisassembly", 

471 ) 

472 ] 

473 

474 # Create the test datastore 

475 datastore = self.makeDatastore() 

476 

477 # Dummy dataId 

478 dimensions = self.universe.extract(("visit", "physical_filter")) 

479 dataId = DataIdForTest({"instrument": "dummy", "visit": 428, "physical_filter": "R"}) 

480 

481 for i, sc in enumerate(storageClasses): 

482 with self.subTest(storageClass=sc.name): 

483 # Create a different dataset type each time round 

484 # so that a test failure in this subtest does not trigger 

485 # a cascade of tests because of file clashes 

486 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, conform=False) 

487 

488 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

489 

490 datastore.put(metrics, ref) 

491 

492 baseURI, compURIs = datastore.getURIs(ref) 

493 if disassembled: 

494 self.assertIsNone(baseURI) 

495 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

496 else: 

497 self.assertIsNotNone(baseURI) 

498 self.assertEqual(compURIs, {}) 

499 

500 metrics_get = datastore.get(ref) 

501 self.assertEqual(metrics_get, metrics) 

502 

503 # Retrieve the composite with read parameter 

504 stop = 4 

505 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

506 self.assertEqual(metrics_get.summary, metrics.summary) 

507 self.assertEqual(metrics_get.output, metrics.output) 

508 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

509 

510 # Retrieve a component 

511 data = datastore.get(ref.makeComponentRef("data")) 

512 self.assertEqual(data, metrics.data) 

513 

514 # On supported storage classes attempt to access a read 

515 # only component 

516 if "ReadComp" in sc.name: 

517 cRef = ref.makeComponentRef("counter") 

518 counter = datastore.get(cRef) 

519 self.assertEqual(counter, len(metrics.data)) 

520 

521 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

522 self.assertEqual(counter, stop) 

523 

524 datastore.remove(ref) 

525 

526 def prepDeleteTest(self, n_refs=1): 

527 metrics = makeExampleMetrics() 

528 datastore = self.makeDatastore() 

529 # Put 

530 dimensions = self.universe.extract(("visit", "physical_filter")) 

531 sc = self.storageClassFactory.getStorageClass("StructuredData") 

532 refs = [] 

533 for i in range(n_refs): 

534 dataId = FakeDataCoordinate.from_dict( 

535 {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U"} 

536 ) 

537 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

538 datastore.put(metrics, ref) 

539 

540 # Does it exist? 

541 self.assertTrue(datastore.exists(ref)) 

542 

543 # Get 

544 metricsOut = datastore.get(ref) 

545 self.assertEqual(metrics, metricsOut) 

546 refs.append(ref) 

547 

548 return datastore, *refs 

549 

550 def testRemove(self): 

551 datastore, ref = self.prepDeleteTest() 

552 

553 # Remove 

554 datastore.remove(ref) 

555 

556 # Does it exist? 

557 self.assertFalse(datastore.exists(ref)) 

558 

559 # Do we now get a predicted URI? 

560 uri = datastore.getURI(ref, predict=True) 

561 self.assertEqual(uri.fragment, "predicted") 

562 

563 # Get should now fail 

564 with self.assertRaises(FileNotFoundError): 

565 datastore.get(ref) 

566 # Can only delete once 

567 with self.assertRaises(FileNotFoundError): 

568 datastore.remove(ref) 

569 

570 def testForget(self): 

571 datastore, ref = self.prepDeleteTest() 

572 

573 # Remove 

574 datastore.forget([ref]) 

575 

576 # Does it exist (as far as we know)? 

577 self.assertFalse(datastore.exists(ref)) 

578 

579 # Do we now get a predicted URI? 

580 uri = datastore.getURI(ref, predict=True) 

581 self.assertEqual(uri.fragment, "predicted") 

582 

583 # Get should now fail 

584 with self.assertRaises(FileNotFoundError): 

585 datastore.get(ref) 

586 

587 # Forgetting again is a silent no-op 

588 datastore.forget([ref]) 

589 

590 # Predicted URI should still point to the file. 

591 self.assertTrue(uri.exists()) 

592 

593 def testTransfer(self): 

594 metrics = makeExampleMetrics() 

595 

596 dimensions = self.universe.extract(("visit", "physical_filter")) 

597 dataId = DataIdForTest({"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"}) 

598 

599 sc = self.storageClassFactory.getStorageClass("StructuredData") 

600 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

601 

602 inputDatastore = self.makeDatastore("test_input_datastore") 

603 outputDatastore = self.makeDatastore("test_output_datastore") 

604 

605 inputDatastore.put(metrics, ref) 

606 outputDatastore.transfer(inputDatastore, ref) 

607 

608 metricsOut = outputDatastore.get(ref) 

609 self.assertEqual(metrics, metricsOut) 

610 

611 def testBasicTransaction(self): 

612 datastore = self.makeDatastore() 

613 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

614 dimensions = self.universe.extract(("visit", "physical_filter")) 

615 nDatasets = 6 

616 dataIds = [ 

617 DataIdForTest({"instrument": "dummy", "visit": i, "physical_filter": "V"}) 

618 for i in range(nDatasets) 

619 ] 

620 data = [ 

621 ( 

622 self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False), 

623 makeExampleMetrics(), 

624 ) 

625 for dataId in dataIds 

626 ] 

627 succeed = data[: nDatasets // 2] 

628 fail = data[nDatasets // 2 :] 

629 # All datasets added in this transaction should continue to exist 

630 with datastore.transaction(): 

631 for ref, metrics in succeed: 

632 datastore.put(metrics, ref) 

633 # Whereas datasets added in this transaction should not 

634 with self.assertRaises(TransactionTestError): 

635 with datastore.transaction(): 

636 for ref, metrics in fail: 

637 datastore.put(metrics, ref) 

638 raise TransactionTestError("This should propagate out of the context manager") 

639 # Check for datasets that should exist 

640 for ref, metrics in succeed: 

641 # Does it exist? 

642 self.assertTrue(datastore.exists(ref)) 

643 # Get 

644 metricsOut = datastore.get(ref, parameters=None) 

645 self.assertEqual(metrics, metricsOut) 

646 # URI 

647 uri = datastore.getURI(ref) 

648 self.assertEqual(uri.scheme, self.uriScheme) 

649 # Check for datasets that should not exist 

650 for ref, _ in fail: 

651 # These should raise 

652 with self.assertRaises(FileNotFoundError): 

653 # non-existing file 

654 datastore.get(ref) 

655 with self.assertRaises(FileNotFoundError): 

656 datastore.getURI(ref) 

657 

658 def testNestedTransaction(self): 

659 datastore = self.makeDatastore() 

660 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

661 dimensions = self.universe.extract(("visit", "physical_filter")) 

662 metrics = makeExampleMetrics() 

663 

664 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"}) 

665 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

666 datastore.put(metrics, refBefore) 

667 with self.assertRaises(TransactionTestError): 

668 with datastore.transaction(): 

669 dataId = DataIdForTest({"instrument": "dummy", "visit": 1, "physical_filter": "V"}) 

670 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

671 datastore.put(metrics, refOuter) 

672 with datastore.transaction(): 

673 dataId = DataIdForTest({"instrument": "dummy", "visit": 2, "physical_filter": "V"}) 

674 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

675 datastore.put(metrics, refInner) 

676 # All datasets should exist 

677 for ref in (refBefore, refOuter, refInner): 

678 metricsOut = datastore.get(ref, parameters=None) 

679 self.assertEqual(metrics, metricsOut) 

680 raise TransactionTestError("This should roll back the transaction") 

681 # Dataset(s) inserted before the transaction should still exist 

682 metricsOut = datastore.get(refBefore, parameters=None) 

683 self.assertEqual(metrics, metricsOut) 

684 # But all datasets inserted during the (rolled back) transaction 

685 # should be gone 

686 with self.assertRaises(FileNotFoundError): 

687 datastore.get(refOuter) 

688 with self.assertRaises(FileNotFoundError): 

689 datastore.get(refInner) 

690 

691 def _prepareIngestTest(self): 

692 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

693 dimensions = self.universe.extract(("visit", "physical_filter")) 

694 metrics = makeExampleMetrics() 

695 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"}) 

696 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

697 return metrics, ref 

698 

699 def runIngestTest(self, func, expectOutput=True): 

700 metrics, ref = self._prepareIngestTest() 

701 # The file will be deleted after the test. 

702 # For symlink tests this leads to a situation where the datastore 

703 # points to a file that does not exist. This will make os.path.exist 

704 # return False but then the new symlink will fail with 

705 # FileExistsError later in the code so the test still passes. 

706 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path: 

707 with open(path, "w") as fd: 

708 yaml.dump(metrics._asdict(), stream=fd) 

709 func(metrics, path, ref) 

710 

711 def testIngestNoTransfer(self): 

712 """Test ingesting existing files with no transfer.""" 

713 for mode in (None, "auto"): 

714 

715 # Some datastores have auto but can't do in place transfer 

716 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

717 continue 

718 

719 with self.subTest(mode=mode): 

720 datastore = self.makeDatastore() 

721 

722 def succeed(obj, path, ref): 

723 """Ingest a file already in the datastore root.""" 

724 # first move it into the root, and adjust the path 

725 # accordingly 

726 path = shutil.copy(path, datastore.root.ospath) 

727 path = os.path.relpath(path, start=datastore.root.ospath) 

728 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

729 self.assertEqual(obj, datastore.get(ref)) 

730 

731 def failInputDoesNotExist(obj, path, ref): 

732 """Can't ingest files if we're given a bad path.""" 

733 with self.assertRaises(FileNotFoundError): 

734 datastore.ingest( 

735 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode 

736 ) 

737 self.assertFalse(datastore.exists(ref)) 

738 

739 def failOutsideRoot(obj, path, ref): 

740 """Can't ingest files outside of datastore root unless 

741 auto.""" 

742 if mode == "auto": 

743 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

744 self.assertTrue(datastore.exists(ref)) 

745 else: 

746 with self.assertRaises(RuntimeError): 

747 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

748 self.assertFalse(datastore.exists(ref)) 

749 

750 def failNotImplemented(obj, path, ref): 

751 with self.assertRaises(NotImplementedError): 

752 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

753 

754 if mode in self.ingestTransferModes: 

755 self.runIngestTest(failOutsideRoot) 

756 self.runIngestTest(failInputDoesNotExist) 

757 self.runIngestTest(succeed) 

758 else: 

759 self.runIngestTest(failNotImplemented) 

760 

761 def testIngestTransfer(self): 

762 """Test ingesting existing files after transferring them.""" 

763 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

764 with self.subTest(mode=mode): 

765 datastore = self.makeDatastore(mode) 

766 

767 def succeed(obj, path, ref): 

768 """Ingest a file by transferring it to the template 

769 location.""" 

770 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

771 self.assertEqual(obj, datastore.get(ref)) 

772 

773 def failInputDoesNotExist(obj, path, ref): 

774 """Can't ingest files if we're given a bad path.""" 

775 with self.assertRaises(FileNotFoundError): 

776 # Ensure the file does not look like it is in 

777 # datastore for auto mode 

778 datastore.ingest( 

779 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode 

780 ) 

781 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

782 

783 def failNotImplemented(obj, path, ref): 

784 with self.assertRaises(NotImplementedError): 

785 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

786 

787 if mode in self.ingestTransferModes: 

788 self.runIngestTest(failInputDoesNotExist) 

789 self.runIngestTest(succeed, expectOutput=(mode != "move")) 

790 else: 

791 self.runIngestTest(failNotImplemented) 

792 

793 def testIngestSymlinkOfSymlink(self): 

794 """Special test for symlink to a symlink ingest""" 

795 metrics, ref = self._prepareIngestTest() 

796 # The aim of this test is to create a dataset on disk, then 

797 # create a symlink to it and finally ingest the symlink such that 

798 # the symlink in the datastore points to the original dataset. 

799 for mode in ("symlink", "relsymlink"): 

800 if mode not in self.ingestTransferModes: 

801 continue 

802 

803 print(f"Trying mode {mode}") 

804 with lsst.utils.tests.getTempFilePath(".yaml") as realpath: 

805 with open(realpath, "w") as fd: 

806 yaml.dump(metrics._asdict(), stream=fd) 

807 with lsst.utils.tests.getTempFilePath(".yaml") as sympath: 

808 os.symlink(os.path.abspath(realpath), sympath) 

809 

810 datastore = self.makeDatastore() 

811 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

812 

813 uri = datastore.getURI(ref) 

814 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

815 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

816 

817 linkTarget = os.readlink(uri.ospath) 

818 if mode == "relsymlink": 

819 self.assertFalse(os.path.isabs(linkTarget)) 

820 else: 

821 self.assertEqual(linkTarget, os.path.abspath(realpath)) 

822 

823 # Check that we can get the dataset back regardless of mode 

824 metric2 = datastore.get(ref) 

825 self.assertEqual(metric2, metrics) 

826 

827 # Cleanup the file for next time round loop 

828 # since it will get the same file name in store 

829 datastore.remove(ref) 

830 

831 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]: 

832 datastore = self.makeDatastore(name) 

833 

834 # For now only the FileDatastore can be used for this test. 

835 # ChainedDatastore that only includes InMemoryDatastores have to be 

836 # skipped as well. 

837 for name in datastore.names: 

838 if not name.startswith("InMemoryDatastore"): 

839 break 

840 else: 

841 raise unittest.SkipTest("in-memory datastore does not support record export/import") 

842 

843 metrics = makeExampleMetrics() 

844 dimensions = self.universe.extract(("visit", "physical_filter")) 

845 sc = self.storageClassFactory.getStorageClass("StructuredData") 

846 

847 refs = [] 

848 for visit in (2048, 2049, 2050): 

849 dataId = FakeDataCoordinate.from_dict( 

850 {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime"} 

851 ) 

852 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

853 datastore.put(metrics, ref) 

854 refs.append(ref) 

855 return datastore, refs 

856 

857 def testExportImportRecords(self): 

858 """Test for export_records and import_records methods.""" 

859 datastore, refs = self._populate_export_datastore("test_datastore") 

860 for exported_refs in (refs, refs[1:]): 

861 n_refs = len(exported_refs) 

862 records = datastore.export_records(exported_refs) 

863 self.assertGreater(len(records), 0) 

864 self.assertTrue(set(records.keys()) <= set(datastore.names)) 

865 # In a ChainedDatastore each FileDatastore will have a complete set 

866 for datastore_name in records: 

867 record_data = records[datastore_name] 

868 self.assertEqual(len(record_data.records), n_refs) 

869 

870 # Use the same datastore name to import relative path. 

871 datastore2 = self.makeDatastore("test_datastore") 

872 

873 records = datastore.export_records(refs[1:]) 

874 datastore2.import_records(records) 

875 

876 with self.assertRaises(FileNotFoundError): 

877 data = datastore2.get(refs[0]) 

878 data = datastore2.get(refs[1]) 

879 self.assertIsNotNone(data) 

880 data = datastore2.get(refs[2]) 

881 self.assertIsNotNone(data) 

882 

883 def testExport(self): 

884 datastore, refs = self._populate_export_datastore("test_datastore") 

885 

886 datasets = list(datastore.export(refs)) 

887 self.assertEqual(len(datasets), 3) 

888 

889 for transfer in (None, "auto"): 

890 # Both will default to None 

891 datasets = list(datastore.export(refs, transfer=transfer)) 

892 self.assertEqual(len(datasets), 3) 

893 

894 with self.assertRaises(TypeError): 

895 list(datastore.export(refs, transfer="copy")) 

896 

897 with self.assertRaises(TypeError): 

898 list(datastore.export(refs, directory="exportDir", transfer="move")) 

899 

900 # Create a new ref that is not known to the datastore and try to 

901 # export it. 

902 sc = self.storageClassFactory.getStorageClass("ThingOne") 

903 dimensions = self.universe.extract(("visit", "physical_filter")) 

904 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

905 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

906 with self.assertRaises(FileNotFoundError): 

907 list(datastore.export(refs + [ref], transfer=None)) 

908 

909 def test_pydantic_dict_storage_class_conversions(self): 

910 """Test converting a dataset stored as a pydantic model into a dict on 

911 read. 

912 """ 

913 datastore = self.makeDatastore() 

914 store_as_model = self.makeDatasetRef( 

915 "store_as_model", 

916 dimensions=self.universe.empty, 

917 storageClass="DictConvertibleModel", 

918 dataId=DataCoordinate.makeEmpty(self.universe), 

919 ) 

920 content = {"a": "one", "b": "two"} 

921 model = DictConvertibleModel.from_dict(content, extra="original content") 

922 datastore.put(model, store_as_model) 

923 retrieved_model = datastore.get(store_as_model) 

924 self.assertEqual(retrieved_model, model) 

925 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel")) 

926 self.assertEqual(type(loaded), dict) 

927 self.assertEqual(loaded, content) 

928 

929 def test_simple_class_put_get(self): 

930 """Test that we can put and get a simple class with dict() 

931 constructor.""" 

932 datastore = self.makeDatastore() 

933 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

934 self._assert_different_puts(datastore, "MetricsExample", data) 

935 

936 def test_dataclass_put_get(self): 

937 """Test that we can put and get a simple dataclass.""" 

938 datastore = self.makeDatastore() 

939 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

940 self._assert_different_puts(datastore, "MetricsExampleDataclass", data) 

941 

942 def test_pydantic_put_get(self): 

943 """Test that we can put and get a simple Pydantic model.""" 

944 datastore = self.makeDatastore() 

945 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

946 self._assert_different_puts(datastore, "MetricsExampleModel", data) 

947 

948 def test_tuple_put_get(self): 

949 """Test that we can put and get a tuple.""" 

950 datastore = self.makeDatastore() 

951 data = tuple(["a", "b", 1]) 

952 self._assert_different_puts(datastore, "TupleExample", data) 

953 

954 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data) -> None: 

955 refs = { 

956 x: self.makeDatasetRef( 

957 f"stora_as_{x}", 

958 dimensions=self.universe.empty, 

959 storageClass=f"{storageClass_root}{x}", 

960 dataId=DataCoordinate.makeEmpty(self.universe), 

961 ) 

962 for x in ["A", "B"] 

963 } 

964 

965 for ref in refs.values(): 

966 datastore.put(data, ref) 

967 

968 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"])) 

969 

970 

971class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

972 """PosixDatastore specialization""" 

973 

974 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

975 uriScheme = "file" 

976 canIngestNoTransferAuto = True 

977 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

978 isEphemeral = False 

979 rootKeys = ("root",) 

980 validationCanFail = True 

981 

982 def setUp(self): 

983 # Override the working directory before calling the base class 

984 self.root = tempfile.mkdtemp(dir=TESTDIR) 

985 super().setUp() 

986 

987 def testAtomicWrite(self): 

988 """Test that we write to a temporary and then rename""" 

989 datastore = self.makeDatastore() 

990 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

991 dimensions = self.universe.extract(("visit", "physical_filter")) 

992 metrics = makeExampleMetrics() 

993 

994 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"}) 

995 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

996 

997 with self.assertLogs("lsst.resources", "DEBUG") as cm: 

998 datastore.put(metrics, ref) 

999 move_logs = [ll for ll in cm.output if "transfer=" in ll] 

1000 self.assertIn("transfer=move", move_logs[0]) 

1001 

1002 # And the transfer should be file to file. 

1003 self.assertEqual(move_logs[0].count("file://"), 2) 

1004 

1005 def testCanNotDeterminePutFormatterLocation(self): 

1006 """Verify that the expected exception is raised if the FileDatastore 

1007 can not determine the put formatter location.""" 

1008 

1009 _ = makeExampleMetrics() 

1010 datastore = self.makeDatastore() 

1011 

1012 # Create multiple storage classes for testing different formulations 

1013 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1014 

1015 sccomp = StorageClass("Dummy") 

1016 compositeStorageClass = StorageClass( 

1017 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp} 

1018 ) 

1019 

1020 dimensions = self.universe.extract(("visit", "physical_filter")) 

1021 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1022 

1023 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

1024 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId, conform=False) 

1025 

1026 def raiser(ref): 

1027 raise DatasetTypeNotSupportedError() 

1028 

1029 with unittest.mock.patch.object( 

1030 lsst.daf.butler.datastores.fileDatastore.FileDatastore, 

1031 "_determine_put_formatter_location", 

1032 side_effect=raiser, 

1033 ): 

1034 # verify the non-composite ref execution path: 

1035 with self.assertRaises(DatasetTypeNotSupportedError): 

1036 datastore.getURIs(ref, predict=True) 

1037 

1038 # verify the composite-ref execution path: 

1039 with self.assertRaises(DatasetTypeNotSupportedError): 

1040 datastore.getURIs(compRef, predict=True) 

1041 

1042 

1043class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

1044 """Posix datastore tests but with checksums disabled.""" 

1045 

1046 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

1047 

1048 def testChecksum(self): 

1049 """Ensure that checksums have not been calculated.""" 

1050 

1051 datastore = self.makeDatastore() 

1052 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1053 dimensions = self.universe.extract(("visit", "physical_filter")) 

1054 metrics = makeExampleMetrics() 

1055 

1056 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"}) 

1057 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

1058 

1059 # Configuration should have disabled checksum calculation 

1060 datastore.put(metrics, ref) 

1061 infos = datastore.getStoredItemsInfo(ref) 

1062 self.assertIsNone(infos[0].checksum) 

1063 

1064 # Remove put back but with checksums enabled explicitly 

1065 datastore.remove(ref) 

1066 datastore.useChecksum = True 

1067 datastore.put(metrics, ref) 

1068 

1069 infos = datastore.getStoredItemsInfo(ref) 

1070 self.assertIsNotNone(infos[0].checksum) 

1071 

1072 

1073class TrashDatastoreTestCase(PosixDatastoreTestCase): 

1074 """Restrict trash test to FileDatastore.""" 

1075 

1076 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1077 

1078 def testTrash(self): 

1079 datastore, *refs = self.prepDeleteTest(n_refs=10) 

1080 

1081 # Trash one of them. 

1082 ref = refs.pop() 

1083 uri = datastore.getURI(ref) 

1084 datastore.trash(ref) 

1085 self.assertTrue(uri.exists(), uri) # Not deleted yet 

1086 datastore.emptyTrash() 

1087 self.assertFalse(uri.exists(), uri) 

1088 

1089 # Trash it again should be fine. 

1090 datastore.trash(ref) 

1091 

1092 # Trash multiple items at once. 

1093 subset = [refs.pop(), refs.pop()] 

1094 datastore.trash(subset) 

1095 datastore.emptyTrash() 

1096 

1097 # Remove a record and trash should do nothing. 

1098 # This is execution butler scenario. 

1099 ref = refs.pop() 

1100 uri = datastore.getURI(ref) 

1101 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

1102 self.assertTrue(uri.exists()) 

1103 datastore.trash(ref) 

1104 datastore.emptyTrash() 

1105 self.assertTrue(uri.exists()) 

1106 

1107 # Switch on trust and it should delete the file. 

1108 datastore.trustGetRequest = True 

1109 datastore.trash([ref]) 

1110 self.assertFalse(uri.exists()) 

1111 

1112 # Remove multiples at once in trust mode. 

1113 subset = [refs.pop() for i in range(3)] 

1114 datastore.trash(subset) 

1115 datastore.trash(refs.pop()) # Check that a single ref can trash 

1116 

1117 

1118class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

1119 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1120 

1121 def setUp(self): 

1122 # Override the working directory before calling the base class 

1123 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1124 super().setUp() 

1125 

1126 def testCleanup(self): 

1127 """Test that a failed formatter write does cleanup a partial file.""" 

1128 metrics = makeExampleMetrics() 

1129 datastore = self.makeDatastore() 

1130 

1131 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1132 

1133 dimensions = self.universe.extract(("visit", "physical_filter")) 

1134 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1135 

1136 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

1137 

1138 # Determine where the file will end up (we assume Formatters use 

1139 # the same file extension) 

1140 expectedUri = datastore.getURI(ref, predict=True) 

1141 self.assertEqual(expectedUri.fragment, "predicted") 

1142 

1143 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}") 

1144 

1145 # Try formatter that fails and formatter that fails and leaves 

1146 # a file behind 

1147 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

1148 with self.subTest(formatter=formatter): 

1149 

1150 # Monkey patch the formatter 

1151 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True) 

1152 

1153 # Try to put the dataset, it should fail 

1154 with self.assertRaises(Exception): 

1155 datastore.put(metrics, ref) 

1156 

1157 # Check that there is no file on disk 

1158 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1159 

1160 # Check that there is a directory 

1161 dir = expectedUri.dirname() 

1162 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}") 

1163 

1164 # Force YamlFormatter and check that this time a file is written 

1165 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True) 

1166 datastore.put(metrics, ref) 

1167 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1168 datastore.remove(ref) 

1169 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

1170 

1171 

1172class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1173 """PosixDatastore specialization""" 

1174 

1175 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

1176 uriScheme = "mem" 

1177 hasUnsupportedPut = False 

1178 ingestTransferModes = () 

1179 isEphemeral = True 

1180 rootKeys = None 

1181 validationCanFail = False 

1182 

1183 

1184class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

1185 """ChainedDatastore specialization using a POSIXDatastore""" 

1186 

1187 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

1188 hasUnsupportedPut = False 

1189 canIngestNoTransferAuto = False 

1190 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto") 

1191 isEphemeral = False 

1192 rootKeys = (".datastores.1.root", ".datastores.2.root") 

1193 validationCanFail = True 

1194 

1195 

1196class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

1197 """ChainedDatastore specialization using all InMemoryDatastore""" 

1198 

1199 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

1200 validationCanFail = False 

1201 

1202 

1203class DatastoreConstraintsTests(DatastoreTestsBase): 

1204 """Basic tests of constraints model of Datastores.""" 

1205 

1206 def testConstraints(self): 

1207 """Test constraints model. Assumes that each test class has the 

1208 same constraints.""" 

1209 metrics = makeExampleMetrics() 

1210 datastore = self.makeDatastore() 

1211 

1212 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1213 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1214 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1215 dataId = DataIdForTest({"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}) 

1216 

1217 # Write empty file suitable for ingest check (JSON and YAML variants) 

1218 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1219 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1220 for datasetTypeName, sc, accepted in ( 

1221 ("metric", sc1, True), 

1222 ("metric5", sc1, False), 

1223 ("metric33", sc1, True), 

1224 ("metric5", sc2, True), 

1225 ): 

1226 # Choose different temp file depending on StorageClass 

1227 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1228 

1229 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

1230 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

1231 if accepted: 

1232 datastore.put(metrics, ref) 

1233 self.assertTrue(datastore.exists(ref)) 

1234 datastore.remove(ref) 

1235 

1236 # Try ingest 

1237 if self.canIngest: 

1238 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1239 self.assertTrue(datastore.exists(ref)) 

1240 datastore.remove(ref) 

1241 else: 

1242 with self.assertRaises(DatasetTypeNotSupportedError): 

1243 datastore.put(metrics, ref) 

1244 self.assertFalse(datastore.exists(ref)) 

1245 

1246 # Again with ingest 

1247 if self.canIngest: 

1248 with self.assertRaises(DatasetTypeNotSupportedError): 

1249 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1250 self.assertFalse(datastore.exists(ref)) 

1251 

1252 

1253class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1254 """PosixDatastore specialization""" 

1255 

1256 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

1257 canIngest = True 

1258 

1259 def setUp(self): 

1260 # Override the working directory before calling the base class 

1261 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1262 super().setUp() 

1263 

1264 

1265class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1266 """InMemoryDatastore specialization""" 

1267 

1268 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

1269 canIngest = False 

1270 

1271 

1272class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

1273 """ChainedDatastore specialization using a POSIXDatastore and constraints 

1274 at the ChainedDatstore""" 

1275 

1276 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

1277 

1278 

1279class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

1280 """ChainedDatastore specialization using a POSIXDatastore""" 

1281 

1282 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

1283 

1284 

1285class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

1286 """ChainedDatastore specialization using all InMemoryDatastore""" 

1287 

1288 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

1289 canIngest = False 

1290 

1291 

1292class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

1293 """Test that a chained datastore can control constraints per-datastore 

1294 even if child datastore would accept.""" 

1295 

1296 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

1297 

1298 def setUp(self): 

1299 # Override the working directory before calling the base class 

1300 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1301 super().setUp() 

1302 

1303 def testConstraints(self): 

1304 """Test chained datastore constraints model.""" 

1305 metrics = makeExampleMetrics() 

1306 datastore = self.makeDatastore() 

1307 

1308 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1309 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1310 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1311 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

1312 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"} 

1313 

1314 # Write empty file suitable for ingest check (JSON and YAML variants) 

1315 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1316 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1317 

1318 for typeName, dataId, sc, accept, ingest in ( 

1319 ("metric", dataId1, sc1, (False, True, False), True), 

1320 ("metric5", dataId1, sc1, (False, False, False), False), 

1321 ("metric5", dataId2, sc1, (True, False, False), False), 

1322 ("metric33", dataId2, sc2, (True, True, False), True), 

1323 ("metric5", dataId1, sc2, (False, True, False), True), 

1324 ): 

1325 

1326 # Choose different temp file depending on StorageClass 

1327 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1328 

1329 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

1330 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, conform=False) 

1331 if any(accept): 

1332 datastore.put(metrics, ref) 

1333 self.assertTrue(datastore.exists(ref)) 

1334 

1335 # Check each datastore inside the chained datastore 

1336 for childDatastore, expected in zip(datastore.datastores, accept): 

1337 self.assertEqual( 

1338 childDatastore.exists(ref), 

1339 expected, 

1340 f"Testing presence of {ref} in datastore {childDatastore.name}", 

1341 ) 

1342 

1343 datastore.remove(ref) 

1344 

1345 # Check that ingest works 

1346 if ingest: 

1347 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1348 self.assertTrue(datastore.exists(ref)) 

1349 

1350 # Check each datastore inside the chained datastore 

1351 for childDatastore, expected in zip(datastore.datastores, accept): 

1352 # Ephemeral datastores means InMemory at the moment 

1353 # and that does not accept ingest of files. 

1354 if childDatastore.isEphemeral: 

1355 expected = False 

1356 self.assertEqual( 

1357 childDatastore.exists(ref), 

1358 expected, 

1359 f"Testing presence of ingested {ref} in datastore {childDatastore.name}", 

1360 ) 

1361 

1362 datastore.remove(ref) 

1363 else: 

1364 with self.assertRaises(DatasetTypeNotSupportedError): 

1365 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1366 

1367 else: 

1368 with self.assertRaises(DatasetTypeNotSupportedError): 

1369 datastore.put(metrics, ref) 

1370 self.assertFalse(datastore.exists(ref)) 

1371 

1372 # Again with ingest 

1373 with self.assertRaises(DatasetTypeNotSupportedError): 

1374 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1375 self.assertFalse(datastore.exists(ref)) 

1376 

1377 

1378class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase): 

1379 """Tests for datastore caching infrastructure.""" 

1380 

1381 @classmethod 

1382 def setUpClass(cls): 

1383 cls.storageClassFactory = StorageClassFactory() 

1384 cls.universe = DimensionUniverse() 

1385 

1386 # Ensure that we load the test storage class definitions. 

1387 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

1388 cls.storageClassFactory.addFromConfig(scConfigFile) 

1389 

1390 def setUp(self): 

1391 self.id = 0 

1392 

1393 # Create a root that we can use for caching tests. 

1394 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1395 

1396 # Create some test dataset refs and associated test files 

1397 sc = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1398 dimensions = self.universe.extract(("visit", "physical_filter")) 

1399 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1400 

1401 # Create list of refs and list of temporary files 

1402 n_datasets = 10 

1403 self.refs = [ 

1404 self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, conform=False) 

1405 for n in range(n_datasets) 

1406 ] 

1407 

1408 root_uri = ResourcePath(self.root, forceDirectory=True) 

1409 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)] 

1410 

1411 # Create test files. 

1412 for uri in self.files: 

1413 uri.write(b"0123456789") 

1414 

1415 # Create some composite refs with component files. 

1416 sc = self.storageClassFactory.getStorageClass("StructuredData") 

1417 self.composite_refs = [ 

1418 self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, conform=False) for n in range(3) 

1419 ] 

1420 self.comp_files = [] 

1421 self.comp_refs = [] 

1422 for n, ref in enumerate(self.composite_refs): 

1423 component_refs = [] 

1424 component_files = [] 

1425 for component in sc.components: 

1426 component_ref = ref.makeComponentRef(component) 

1427 file = root_uri.join(f"composite_file-{n}-{component}.txt") 

1428 component_refs.append(component_ref) 

1429 component_files.append(file) 

1430 file.write(b"9876543210") 

1431 

1432 self.comp_files.append(component_files) 

1433 self.comp_refs.append(component_refs) 

1434 

1435 def tearDown(self): 

1436 if self.root is not None and os.path.exists(self.root): 

1437 shutil.rmtree(self.root, ignore_errors=True) 

1438 

1439 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager: 

1440 config = Config.fromYaml(config_str) 

1441 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe) 

1442 

1443 def testNoCacheDir(self): 

1444 config_str = """ 

1445cached: 

1446 root: null 

1447 cacheable: 

1448 metric0: true 

1449 """ 

1450 cache_manager = self._make_cache_manager(config_str) 

1451 

1452 # Look inside to check we don't have a cache directory 

1453 self.assertIsNone(cache_manager._cache_directory) 

1454 

1455 self.assertCache(cache_manager) 

1456 

1457 # Test that the cache directory is marked temporary 

1458 self.assertTrue(cache_manager.cache_directory.isTemporary) 

1459 

1460 def testNoCacheDirReversed(self): 

1461 """Use default caching status and metric1 to false""" 

1462 config_str = """ 

1463cached: 

1464 root: null 

1465 default: true 

1466 cacheable: 

1467 metric1: false 

1468 """ 

1469 cache_manager = self._make_cache_manager(config_str) 

1470 

1471 self.assertCache(cache_manager) 

1472 

1473 def testEnvvarCacheDir(self): 

1474 config_str = f""" 

1475cached: 

1476 root: '{self.root}' 

1477 cacheable: 

1478 metric0: true 

1479 """ 

1480 

1481 root = ResourcePath(self.root, forceDirectory=True) 

1482 env_dir = root.join("somewhere", forceDirectory=True) 

1483 elsewhere = root.join("elsewhere", forceDirectory=True) 

1484 

1485 # Environment variable should override the config value. 

1486 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}): 

1487 cache_manager = self._make_cache_manager(config_str) 

1488 self.assertEqual(cache_manager.cache_directory, env_dir) 

1489 

1490 # This environment variable should not override the config value. 

1491 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1492 cache_manager = self._make_cache_manager(config_str) 

1493 self.assertEqual(cache_manager.cache_directory, root) 

1494 

1495 # No default setting. 

1496 config_str = """ 

1497cached: 

1498 root: null 

1499 default: true 

1500 cacheable: 

1501 metric1: false 

1502 """ 

1503 cache_manager = self._make_cache_manager(config_str) 

1504 

1505 # This environment variable should override the config value. 

1506 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1507 cache_manager = self._make_cache_manager(config_str) 

1508 self.assertEqual(cache_manager.cache_directory, env_dir) 

1509 

1510 # If both environment variables are set the main (not IF_UNSET) 

1511 # variable should win. 

1512 with unittest.mock.patch.dict( 

1513 os.environ, 

1514 { 

1515 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath, 

1516 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath, 

1517 }, 

1518 ): 

1519 cache_manager = self._make_cache_manager(config_str) 

1520 self.assertEqual(cache_manager.cache_directory, env_dir) 

1521 

1522 # Use the API to set the environment variable, making sure that the 

1523 # variable is reset on exit. 

1524 with unittest.mock.patch.dict( 

1525 os.environ, 

1526 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1527 ): 

1528 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1529 self.assertTrue(defined) 

1530 cache_manager = self._make_cache_manager(config_str) 

1531 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1532 

1533 # Now create the cache manager ahead of time and set the fallback 

1534 # later. 

1535 cache_manager = self._make_cache_manager(config_str) 

1536 self.assertIsNone(cache_manager._cache_directory) 

1537 with unittest.mock.patch.dict( 

1538 os.environ, 

1539 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1540 ): 

1541 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1542 self.assertTrue(defined) 

1543 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1544 

1545 def testExplicitCacheDir(self): 

1546 config_str = f""" 

1547cached: 

1548 root: '{self.root}' 

1549 cacheable: 

1550 metric0: true 

1551 """ 

1552 cache_manager = self._make_cache_manager(config_str) 

1553 

1554 # Look inside to check we do have a cache directory. 

1555 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True)) 

1556 

1557 self.assertCache(cache_manager) 

1558 

1559 # Test that the cache directory is not marked temporary 

1560 self.assertFalse(cache_manager.cache_directory.isTemporary) 

1561 

1562 def assertCache(self, cache_manager): 

1563 self.assertTrue(cache_manager.should_be_cached(self.refs[0])) 

1564 self.assertFalse(cache_manager.should_be_cached(self.refs[1])) 

1565 

1566 uri = cache_manager.move_to_cache(self.files[0], self.refs[0]) 

1567 self.assertIsInstance(uri, ResourcePath) 

1568 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1])) 

1569 

1570 # Check presence in cache using ref and then using file extension. 

1571 self.assertFalse(cache_manager.known_to_cache(self.refs[1])) 

1572 self.assertTrue(cache_manager.known_to_cache(self.refs[0])) 

1573 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension())) 

1574 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension())) 

1575 

1576 # Cached file should no longer exist but uncached file should be 

1577 # unaffected. 

1578 self.assertFalse(self.files[0].exists()) 

1579 self.assertTrue(self.files[1].exists()) 

1580 

1581 # Should find this file and it should be within the cache directory. 

1582 with cache_manager.find_in_cache(self.refs[0], ".txt") as found: 

1583 self.assertTrue(found.exists()) 

1584 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory)) 

1585 

1586 # Should not be able to find these in cache 

1587 with cache_manager.find_in_cache(self.refs[0], ".fits") as found: 

1588 self.assertIsNone(found) 

1589 with cache_manager.find_in_cache(self.refs[1], ".fits") as found: 

1590 self.assertIsNone(found) 

1591 

1592 def testNoCache(self): 

1593 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe) 

1594 for uri, ref in zip(self.files, self.refs): 

1595 self.assertFalse(cache_manager.should_be_cached(ref)) 

1596 self.assertIsNone(cache_manager.move_to_cache(uri, ref)) 

1597 self.assertFalse(cache_manager.known_to_cache(ref)) 

1598 with cache_manager.find_in_cache(ref, ".txt") as found: 

1599 self.assertIsNone(found, msg=f"{cache_manager}") 

1600 

1601 def _expiration_config(self, mode: str, threshold: int) -> str: 

1602 return f""" 

1603cached: 

1604 default: true 

1605 expiry: 

1606 mode: {mode} 

1607 threshold: {threshold} 

1608 cacheable: 

1609 unused: true 

1610 """ 

1611 

1612 def testCacheExpiryFiles(self): 

1613 threshold = 2 # Keep at least 2 files. 

1614 mode = "files" 

1615 config_str = self._expiration_config(mode, threshold) 

1616 

1617 cache_manager = self._make_cache_manager(config_str) 

1618 

1619 # Check that an empty cache returns unknown for arbitrary ref 

1620 self.assertFalse(cache_manager.known_to_cache(self.refs[0])) 

1621 

1622 # Should end with datasets: 2, 3, 4 

1623 self.assertExpiration(cache_manager, 5, threshold + 1) 

1624 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1625 

1626 # Check that we will not expire a file that is actively in use. 

1627 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1628 self.assertIsNotNone(found) 

1629 

1630 # Trigger cache expiration that should remove the file 

1631 # we just retrieved. Should now have: 3, 4, 5 

1632 cached = cache_manager.move_to_cache(self.files[5], self.refs[5]) 

1633 self.assertIsNotNone(cached) 

1634 

1635 # Cache should still report the standard file count. 

1636 self.assertEqual(cache_manager.file_count, threshold + 1) 

1637 

1638 # Add additional entry to cache. 

1639 # Should now have 4, 5, 6 

1640 cached = cache_manager.move_to_cache(self.files[6], self.refs[6]) 

1641 self.assertIsNotNone(cached) 

1642 

1643 # Is the file still there? 

1644 self.assertTrue(found.exists()) 

1645 

1646 # Can we read it? 

1647 data = found.read() 

1648 self.assertGreater(len(data), 0) 

1649 

1650 # Outside context the file should no longer exist. 

1651 self.assertFalse(found.exists()) 

1652 

1653 # File count should not have changed. 

1654 self.assertEqual(cache_manager.file_count, threshold + 1) 

1655 

1656 # Dataset 2 was in the exempt directory but because hardlinks 

1657 # are used it was deleted from the main cache during cache expiry 

1658 # above and so should no longer be found. 

1659 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1660 self.assertIsNone(found) 

1661 

1662 # And the one stored after it is also gone. 

1663 with cache_manager.find_in_cache(self.refs[3], ".txt") as found: 

1664 self.assertIsNone(found) 

1665 

1666 # But dataset 4 is present. 

1667 with cache_manager.find_in_cache(self.refs[4], ".txt") as found: 

1668 self.assertIsNotNone(found) 

1669 

1670 # Adding a new dataset to the cache should now delete it. 

1671 cache_manager.move_to_cache(self.files[7], self.refs[7]) 

1672 

1673 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1674 self.assertIsNone(found) 

1675 

1676 def testCacheExpiryDatasets(self): 

1677 threshold = 2 # Keep 2 datasets. 

1678 mode = "datasets" 

1679 config_str = self._expiration_config(mode, threshold) 

1680 

1681 cache_manager = self._make_cache_manager(config_str) 

1682 self.assertExpiration(cache_manager, 5, threshold + 1) 

1683 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1684 

1685 def testCacheExpiryDatasetsComposite(self): 

1686 threshold = 2 # Keep 2 datasets. 

1687 mode = "datasets" 

1688 config_str = self._expiration_config(mode, threshold) 

1689 

1690 cache_manager = self._make_cache_manager(config_str) 

1691 

1692 n_datasets = 3 

1693 for i in range(n_datasets): 

1694 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]): 

1695 cached = cache_manager.move_to_cache(component_file, component_ref) 

1696 self.assertIsNotNone(cached) 

1697 self.assertTrue(cache_manager.known_to_cache(component_ref)) 

1698 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef())) 

1699 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension())) 

1700 

1701 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files 

1702 

1703 # Write two new non-composite and the number of files should drop. 

1704 self.assertExpiration(cache_manager, 2, 5) 

1705 

1706 def testCacheExpirySize(self): 

1707 threshold = 55 # Each file is 10 bytes 

1708 mode = "size" 

1709 config_str = self._expiration_config(mode, threshold) 

1710 

1711 cache_manager = self._make_cache_manager(config_str) 

1712 self.assertExpiration(cache_manager, 10, 6) 

1713 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1714 

1715 def assertExpiration(self, cache_manager, n_datasets, n_retained): 

1716 """Insert the datasets and then check the number retained.""" 

1717 for i in range(n_datasets): 

1718 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1719 self.assertIsNotNone(cached) 

1720 

1721 self.assertEqual(cache_manager.file_count, n_retained) 

1722 

1723 # The oldest file should not be in the cache any more. 

1724 for i in range(n_datasets): 

1725 with cache_manager.find_in_cache(self.refs[i], ".txt") as found: 

1726 if i >= n_datasets - n_retained: 

1727 self.assertIsInstance(found, ResourcePath) 

1728 else: 

1729 self.assertIsNone(found) 

1730 

1731 def testCacheExpiryAge(self): 

1732 threshold = 1 # Expire older than 2 seconds 

1733 mode = "age" 

1734 config_str = self._expiration_config(mode, threshold) 

1735 

1736 cache_manager = self._make_cache_manager(config_str) 

1737 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1738 

1739 # Insert 3 files, then sleep, then insert more. 

1740 for i in range(2): 

1741 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1742 self.assertIsNotNone(cached) 

1743 time.sleep(2.0) 

1744 for j in range(4): 

1745 i = 2 + j # Continue the counting 

1746 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1747 self.assertIsNotNone(cached) 

1748 

1749 # Only the files written after the sleep should exist. 

1750 self.assertEqual(cache_manager.file_count, 4) 

1751 with cache_manager.find_in_cache(self.refs[1], ".txt") as found: 

1752 self.assertIsNone(found) 

1753 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1754 self.assertIsInstance(found, ResourcePath) 

1755 

1756 

1757class DatasetRefURIsTestCase(unittest.TestCase): 

1758 """Tests for DatasetRefURIs.""" 

1759 

1760 def testSequenceAccess(self): 

1761 """Verify that DatasetRefURIs can be treated like a two-item tuple.""" 

1762 uris = DatasetRefURIs() 

1763 

1764 self.assertEqual(len(uris), 2) 

1765 self.assertEqual(uris[0], None) 

1766 self.assertEqual(uris[1], {}) 

1767 

1768 primaryURI = ResourcePath("1/2/3") 

1769 componentURI = ResourcePath("a/b/c") 

1770 

1771 # affirm that DatasetRefURIs does not support MutableSequence functions 

1772 with self.assertRaises(TypeError): 

1773 uris[0] = primaryURI 

1774 with self.assertRaises(TypeError): 

1775 uris[1] = {"foo": componentURI} 

1776 

1777 # but DatasetRefURIs can be set by property name: 

1778 uris.primaryURI = primaryURI 

1779 uris.componentURIs = {"foo": componentURI} 

1780 self.assertEqual(uris.primaryURI, primaryURI) 

1781 self.assertEqual(uris[0], primaryURI) 

1782 

1783 primary, components = uris 

1784 self.assertEqual(primary, primaryURI) 

1785 self.assertEqual(components, {"foo": componentURI}) 

1786 

1787 def testRepr(self): 

1788 """Verify __repr__ output.""" 

1789 uris = DatasetRefURIs(ResourcePath("1/2/3"), {"comp": ResourcePath("a/b/c")}) 

1790 self.assertEqual( 

1791 repr(uris), 

1792 f'DatasetRefURIs(ResourcePath("{os.getcwd()}/1/2/3"), ' 

1793 "{'comp': ResourcePath(\"" + os.getcwd() + '/a/b/c")})', 

1794 ) 

1795 

1796 

1797class DataIdForTestTestCase(unittest.TestCase): 

1798 """Tests for the DataIdForTest class.""" 

1799 

1800 def testImmutable(self): 

1801 """Verify that an instance is immutable by default.""" 

1802 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1803 initial_hash = hash(dataId) 

1804 

1805 with self.assertRaises(RuntimeError): 

1806 dataId["instrument"] = "foo" 

1807 

1808 with self.assertRaises(RuntimeError): 

1809 del dataId["instrument"] 

1810 

1811 assert sys.version_info[0] == 3 

1812 if sys.version_info[1] >= 9: 

1813 with self.assertRaises(RuntimeError): 

1814 dataId |= dict(foo="bar") 

1815 

1816 with self.assertRaises(RuntimeError): 

1817 dataId.pop("instrument") 

1818 

1819 with self.assertRaises(RuntimeError): 

1820 dataId.popitem() 

1821 

1822 with self.assertRaises(RuntimeError): 

1823 dataId.update(dict(instrument="foo")) 

1824 

1825 # verify that the hash value has not changed. 

1826 self.assertEqual(initial_hash, hash(dataId)) 

1827 

1828 def testMutable(self): 

1829 """Verify that an instance can be made mutable (unfrozen).""" 

1830 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1831 initial_hash = hash(dataId) 

1832 dataId.frozen = False 

1833 self.assertEqual(initial_hash, hash(dataId)) 

1834 

1835 dataId["instrument"] = "foo" 

1836 self.assertEqual(dataId["instrument"], "foo") 

1837 self.assertNotEqual(initial_hash, hash(dataId)) 

1838 initial_hash = hash(dataId) 

1839 

1840 del dataId["instrument"] 

1841 self.assertTrue("instrument" not in dataId) 

1842 self.assertNotEqual(initial_hash, hash(dataId)) 

1843 initial_hash = hash(dataId) 

1844 

1845 assert sys.version_info[0] == 3 

1846 if sys.version_info[1] >= 9: 

1847 dataId |= dict(foo="bar") 

1848 self.assertEqual(dataId["foo"], "bar") 

1849 self.assertNotEqual(initial_hash, hash(dataId)) 

1850 initial_hash = hash(dataId) 

1851 

1852 dataId.pop("visit") 

1853 self.assertTrue("visit" not in dataId) 

1854 self.assertNotEqual(initial_hash, hash(dataId)) 

1855 initial_hash = hash(dataId) 

1856 

1857 dataId.popitem() 

1858 self.assertTrue("physical_filter" not in dataId) 

1859 self.assertNotEqual(initial_hash, hash(dataId)) 

1860 initial_hash = hash(dataId) 

1861 

1862 dataId.update(dict(instrument="foo")) 

1863 self.assertEqual(dataId["instrument"], "foo") 

1864 self.assertNotEqual(initial_hash, hash(dataId)) 

1865 initial_hash = hash(dataId) 

1866 

1867 

1868if __name__ == "__main__": 1868 ↛ 1869line 1868 didn't jump to line 1869, because the condition on line 1868 was never true

1869 unittest.main()