Coverage for tests/test_datastore.py: 12%

1073 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-03-01 02:25 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import os 

25import shutil 

26import sys 

27import tempfile 

28import time 

29import unittest 

30import unittest.mock 

31from collections import UserDict 

32from dataclasses import dataclass 

33 

34import lsst.utils.tests 

35import yaml 

36from lsst.daf.butler import ( 

37 Config, 

38 DataCoordinate, 

39 DatasetRef, 

40 DatasetRefURIs, 

41 DatasetType, 

42 DatasetTypeNotSupportedError, 

43 Datastore, 

44 DatastoreCacheManager, 

45 DatastoreCacheManagerConfig, 

46 DatastoreConfig, 

47 DatastoreDisabledCacheManager, 

48 DatastoreValidationError, 

49 DimensionUniverse, 

50 FileDataset, 

51 NamedKeyDict, 

52 StorageClass, 

53 StorageClassFactory, 

54) 

55from lsst.daf.butler.formatters.yaml import YamlFormatter 

56from lsst.daf.butler.tests import ( 

57 BadNoWriteFormatter, 

58 BadWriteFormatter, 

59 DatasetTestHelper, 

60 DatastoreTestHelper, 

61 DummyRegistry, 

62 MetricsExample, 

63 MetricsExampleDataclass, 

64 MetricsExampleModel, 

65) 

66from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel 

67from lsst.resources import ResourcePath 

68from lsst.utils import doImport 

69 

70TESTDIR = os.path.dirname(__file__) 

71 

72 

73class DataIdForTest(UserDict): 

74 

75 """A dict-like class that can be used for a DataId dict that is hashable. 

76 

77 By default the class is immutable ("frozen"). The `frozen` 

78 attribute can be set to `False` to change values (but note that 

79 the hash values before and after mutation will be different!). 

80 """ 

81 

82 def __init__(self, *args, **kwargs): 

83 self.frozen = False 

84 super().__init__(*args, **kwargs) 

85 self.frozen = True 

86 

87 def __hash__(self): 

88 return hash(str(self.data)) 

89 

90 def __setitem__(self, k, v): 

91 if self.frozen: 

92 raise RuntimeError("DataIdForTest is frozen.") 

93 return super().__setitem__(k, v) 

94 

95 def __delitem__(self, k): 

96 if self.frozen: 

97 raise RuntimeError("DataIdForTest is frozen.") 

98 return super().__delitem__(k) 

99 

100 def __ior__(self, other): 

101 assert sys.version_info[0] == 3 

102 if sys.version_info[1] < 9: 

103 raise NotImplementedError("operator |= (ior) is not supported before version 3.9") 

104 if self.frozen: 

105 raise RuntimeError("DataIdForTest is frozen.") 

106 return super().__ior__(other) 

107 

108 def pop(self, k): 

109 if self.frozen: 

110 raise RuntimeError("DataIdForTest is frozen.") 

111 return super().pop(k) 

112 

113 def popitem(self): 

114 if self.frozen: 

115 raise RuntimeError("DataIdForTest is frozen.") 

116 return super().popitem() 

117 

118 def update(self, *args, **kwargs): 

119 if self.frozen: 

120 raise RuntimeError("DataIdForTest is frozen.") 

121 super().update(*args, **kwargs) 

122 

123 

124def makeExampleMetrics(use_none=False): 

125 if use_none: 

126 array = None 

127 else: 

128 array = [563, 234, 456.7, 105, 2054, -1045] 

129 return MetricsExample( 

130 {"AM1": 5.2, "AM2": 30.6}, 

131 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

132 array, 

133 ) 

134 

135 

136@dataclass(frozen=True) 

137class Named: 

138 name: str 

139 

140 

141class FakeDataCoordinate(NamedKeyDict): 

142 """A fake hashable frozen DataCoordinate built from a simple dict.""" 

143 

144 @classmethod 

145 def from_dict(cls, dataId): 

146 new = cls() 

147 for k, v in dataId.items(): 

148 new[Named(k)] = v 

149 return new.freeze() 

150 

151 def __hash__(self) -> int: 

152 return hash(frozenset(self.items())) 

153 

154 

155class TransactionTestError(Exception): 

156 """Specific error for transactions, to prevent misdiagnosing 

157 that might otherwise occur when a standard exception is used. 

158 """ 

159 

160 pass 

161 

162 

163class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper): 

164 """Support routines for datastore testing""" 

165 

166 root = None 

167 

168 @classmethod 

169 def setUpClass(cls): 

170 # Storage Classes are fixed for all datastores in these tests 

171 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

172 cls.storageClassFactory = StorageClassFactory() 

173 cls.storageClassFactory.addFromConfig(scConfigFile) 

174 

175 # Read the Datastore config so we can get the class 

176 # information (since we should not assume the constructor 

177 # name here, but rely on the configuration file itself) 

178 datastoreConfig = DatastoreConfig(cls.configFile) 

179 cls.datastoreType = doImport(datastoreConfig["cls"]) 

180 cls.universe = DimensionUniverse() 

181 

182 def setUp(self): 

183 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

184 

185 def tearDown(self): 

186 if self.root is not None and os.path.exists(self.root): 

187 shutil.rmtree(self.root, ignore_errors=True) 

188 

189 

190class DatastoreTests(DatastoreTestsBase): 

191 """Some basic tests of a simple datastore.""" 

192 

193 hasUnsupportedPut = True 

194 

195 def testConfigRoot(self): 

196 full = DatastoreConfig(self.configFile) 

197 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

198 newroot = "/random/location" 

199 self.datastoreType.setConfigRoot(newroot, config, full) 

200 if self.rootKeys: 

201 for k in self.rootKeys: 

202 self.assertIn(newroot, config[k]) 

203 

204 def testConstructor(self): 

205 datastore = self.makeDatastore() 

206 self.assertIsNotNone(datastore) 

207 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

208 

209 def testConfigurationValidation(self): 

210 datastore = self.makeDatastore() 

211 sc = self.storageClassFactory.getStorageClass("ThingOne") 

212 datastore.validateConfiguration([sc]) 

213 

214 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

215 if self.validationCanFail: 

216 with self.assertRaises(DatastoreValidationError): 

217 datastore.validateConfiguration([sc2], logFailures=True) 

218 

219 dimensions = self.universe.extract(("visit", "physical_filter")) 

220 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

221 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

222 datastore.validateConfiguration([ref]) 

223 

224 def testParameterValidation(self): 

225 """Check that parameters are validated""" 

226 sc = self.storageClassFactory.getStorageClass("ThingOne") 

227 dimensions = self.universe.extract(("visit", "physical_filter")) 

228 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

229 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

230 datastore = self.makeDatastore() 

231 data = {1: 2, 3: 4} 

232 datastore.put(data, ref) 

233 newdata = datastore.get(ref) 

234 self.assertEqual(data, newdata) 

235 with self.assertRaises(KeyError): 

236 newdata = datastore.get(ref, parameters={"missing": 5}) 

237 

238 def testBasicPutGet(self): 

239 metrics = makeExampleMetrics() 

240 datastore = self.makeDatastore() 

241 

242 # Create multiple storage classes for testing different formulations 

243 storageClasses = [ 

244 self.storageClassFactory.getStorageClass(sc) 

245 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle") 

246 ] 

247 

248 dimensions = self.universe.extract(("visit", "physical_filter")) 

249 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

250 dataId2 = DataIdForTest({"instrument": "dummy", "visit": 53, "physical_filter": "V"}) 

251 

252 for sc in storageClasses: 

253 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

254 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2, conform=False) 

255 

256 # Make sure that using getManyURIs without predicting before the 

257 # dataset has been put raises. 

258 with self.assertRaises(FileNotFoundError): 

259 datastore.getManyURIs([ref], predict=False) 

260 

261 # Make sure that using getManyURIs with predicting before the 

262 # dataset has been put predicts the URI. 

263 uris = datastore.getManyURIs([ref, ref2], predict=True) 

264 self.assertIn("52", uris[ref].primaryURI.geturl()) 

265 self.assertIn("#predicted", uris[ref].primaryURI.geturl()) 

266 self.assertIn("53", uris[ref2].primaryURI.geturl()) 

267 self.assertIn("#predicted", uris[ref2].primaryURI.geturl()) 

268 

269 datastore.put(metrics, ref) 

270 

271 # Does it exist? 

272 self.assertTrue(datastore.exists(ref)) 

273 self.assertTrue(datastore.knows(ref)) 

274 multi = datastore.knows_these([ref]) 

275 self.assertTrue(multi[ref]) 

276 multi = datastore.mexists([ref]) 

277 self.assertTrue(multi[ref]) 

278 

279 # Get 

280 metricsOut = datastore.get(ref, parameters=None) 

281 self.assertEqual(metrics, metricsOut) 

282 

283 uri = datastore.getURI(ref) 

284 self.assertEqual(uri.scheme, self.uriScheme) 

285 

286 uris = datastore.getManyURIs([ref]) 

287 self.assertEqual(len(uris), 1) 

288 ref, uri = uris.popitem() 

289 self.assertTrue(uri.primaryURI.exists()) 

290 self.assertFalse(uri.componentURIs) 

291 

292 # Get a component -- we need to construct new refs for them 

293 # with derived storage classes but with parent ID 

294 for comp in ("data", "output"): 

295 compRef = ref.makeComponentRef(comp) 

296 output = datastore.get(compRef) 

297 self.assertEqual(output, getattr(metricsOut, comp)) 

298 

299 uri = datastore.getURI(compRef) 

300 self.assertEqual(uri.scheme, self.uriScheme) 

301 

302 uris = datastore.getManyURIs([compRef]) 

303 self.assertEqual(len(uris), 1) 

304 

305 storageClass = sc 

306 

307 # Check that we can put a metric with None in a component and 

308 # get it back as None 

309 metricsNone = makeExampleMetrics(use_none=True) 

310 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"} 

311 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False) 

312 datastore.put(metricsNone, refNone) 

313 

314 comp = "data" 

315 for comp in ("data", "output"): 

316 compRef = refNone.makeComponentRef(comp) 

317 output = datastore.get(compRef) 

318 self.assertEqual(output, getattr(metricsNone, comp)) 

319 

320 # Check that a put fails if the dataset type is not supported 

321 if self.hasUnsupportedPut: 

322 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

323 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

324 with self.assertRaises(DatasetTypeNotSupportedError): 

325 datastore.put(metrics, ref) 

326 

327 # These should raise 

328 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000) 

329 with self.assertRaises(FileNotFoundError): 

330 # non-existing file 

331 datastore.get(ref) 

332 

333 # Get a URI from it 

334 uri = datastore.getURI(ref, predict=True) 

335 self.assertEqual(uri.scheme, self.uriScheme) 

336 

337 with self.assertRaises(FileNotFoundError): 

338 datastore.getURI(ref) 

339 

340 def testTrustGetRequest(self): 

341 """Check that we can get datasets that registry knows nothing about.""" 

342 

343 datastore = self.makeDatastore() 

344 

345 # Skip test if the attribute is not defined 

346 if not hasattr(datastore, "trustGetRequest"): 

347 return 

348 

349 metrics = makeExampleMetrics() 

350 

351 i = 0 

352 for sc_name in ("StructuredDataNoComponents", "StructuredData", "StructuredComposite"): 

353 i += 1 

354 datasetTypeName = f"test_metric{i}" # Different dataset type name each time. 

355 

356 if sc_name == "StructuredComposite": 

357 disassembled = True 

358 else: 

359 disassembled = False 

360 

361 # Start datastore in default configuration of using registry 

362 datastore.trustGetRequest = False 

363 

364 # Create multiple storage classes for testing with or without 

365 # disassembly 

366 sc = self.storageClassFactory.getStorageClass(sc_name) 

367 dimensions = self.universe.extract(("visit", "physical_filter")) 

368 

369 dataId = DataIdForTest({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"}) 

370 

371 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

372 datastore.put(metrics, ref) 

373 

374 # Does it exist? 

375 self.assertTrue(datastore.exists(ref)) 

376 self.assertTrue(datastore.knows(ref)) 

377 multi = datastore.knows_these([ref]) 

378 self.assertTrue(multi[ref]) 

379 multi = datastore.mexists([ref]) 

380 self.assertTrue(multi[ref]) 

381 

382 # Get 

383 metricsOut = datastore.get(ref) 

384 self.assertEqual(metrics, metricsOut) 

385 

386 # Get the URI(s) 

387 primaryURI, componentURIs = datastore.getURIs(ref) 

388 if disassembled: 

389 self.assertIsNone(primaryURI) 

390 self.assertEqual(len(componentURIs), 3) 

391 else: 

392 self.assertIn(datasetTypeName, primaryURI.path) 

393 self.assertFalse(componentURIs) 

394 

395 # Delete registry entry so now we are trusting 

396 datastore.removeStoredItemInfo(ref) 

397 

398 # Now stop trusting and check that things break 

399 datastore.trustGetRequest = False 

400 

401 # Does it exist? 

402 self.assertFalse(datastore.exists(ref)) 

403 self.assertFalse(datastore.knows(ref)) 

404 multi = datastore.knows_these([ref]) 

405 self.assertFalse(multi[ref]) 

406 multi = datastore.mexists([ref]) 

407 self.assertFalse(multi[ref]) 

408 

409 with self.assertRaises(FileNotFoundError): 

410 datastore.get(ref) 

411 

412 if sc_name != "StructuredDataNoComponents": 

413 with self.assertRaises(FileNotFoundError): 

414 datastore.get(ref.makeComponentRef("data")) 

415 

416 # URI should fail unless we ask for prediction 

417 with self.assertRaises(FileNotFoundError): 

418 datastore.getURIs(ref) 

419 

420 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

421 if disassembled: 

422 self.assertIsNone(predicted_primary) 

423 self.assertEqual(len(predicted_disassembled), 3) 

424 for uri in predicted_disassembled.values(): 

425 self.assertEqual(uri.fragment, "predicted") 

426 self.assertIn(datasetTypeName, uri.path) 

427 else: 

428 self.assertIn(datasetTypeName, predicted_primary.path) 

429 self.assertFalse(predicted_disassembled) 

430 self.assertEqual(predicted_primary.fragment, "predicted") 

431 

432 # Now enable registry-free trusting mode 

433 datastore.trustGetRequest = True 

434 

435 # Try again to get it 

436 metricsOut = datastore.get(ref) 

437 self.assertEqual(metricsOut, metrics) 

438 

439 # Does it exist? 

440 self.assertTrue(datastore.exists(ref)) 

441 

442 # Get a component 

443 if sc_name != "StructuredDataNoComponents": 

444 comp = "data" 

445 compRef = ref.makeComponentRef(comp) 

446 output = datastore.get(compRef) 

447 self.assertEqual(output, getattr(metrics, comp)) 

448 

449 # Get the URI -- if we trust this should work even without 

450 # enabling prediction. 

451 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

452 self.assertEqual(primaryURI2, primaryURI) 

453 self.assertEqual(componentURIs2, componentURIs) 

454 

455 # Check for compatible storage class. 

456 if sc_name in ("StructuredDataNoComponents", "StructuredData"): 

457 # Make new dataset ref with compatible storage class. 

458 ref_comp = ref.overrideStorageClass("StructuredDataDictJson") 

459 

460 # Without `set_retrieve_dataset_type_method` it will fail to 

461 # find correct file. 

462 self.assertFalse(datastore.exists(ref_comp)) 

463 with self.assertRaises(FileNotFoundError): 

464 datastore.get(ref_comp) 

465 with self.assertRaises(FileNotFoundError): 

466 datastore.get(ref, storageClass="StructuredDataDictJson") 

467 

468 # Need a special method to generate stored dataset type. 

469 def _stored_dataset_type(name: str) -> DatasetType: 

470 if name == ref.datasetType.name: 

471 return ref.datasetType 

472 raise ValueError(f"Unexpected dataset type name {ref.datasetType.name}") 

473 

474 datastore.set_retrieve_dataset_type_method(_stored_dataset_type) 

475 

476 # Storage class override with original dataset ref. 

477 metrics_as_dict = datastore.get(ref, storageClass="StructuredDataDictJson") 

478 self.assertIsInstance(metrics_as_dict, dict) 

479 

480 # get() should return a dict now. 

481 metrics_as_dict = datastore.get(ref_comp) 

482 self.assertIsInstance(metrics_as_dict, dict) 

483 

484 # exists() should work as well. 

485 self.assertTrue(datastore.exists(ref_comp)) 

486 

487 datastore.set_retrieve_dataset_type_method(None) 

488 

489 def testDisassembly(self): 

490 """Test disassembly within datastore.""" 

491 metrics = makeExampleMetrics() 

492 if self.isEphemeral: 

493 # in-memory datastore does not disassemble 

494 return 

495 

496 # Create multiple storage classes for testing different formulations 

497 # of composites. One of these will not disassemble to provide 

498 # a reference. 

499 storageClasses = [ 

500 self.storageClassFactory.getStorageClass(sc) 

501 for sc in ( 

502 "StructuredComposite", 

503 "StructuredCompositeTestA", 

504 "StructuredCompositeTestB", 

505 "StructuredCompositeReadComp", 

506 "StructuredData", # No disassembly 

507 "StructuredCompositeReadCompNoDisassembly", 

508 ) 

509 ] 

510 

511 # Create the test datastore 

512 datastore = self.makeDatastore() 

513 

514 # Dummy dataId 

515 dimensions = self.universe.extract(("visit", "physical_filter")) 

516 dataId = DataIdForTest({"instrument": "dummy", "visit": 428, "physical_filter": "R"}) 

517 

518 for i, sc in enumerate(storageClasses): 

519 with self.subTest(storageClass=sc.name): 

520 # Create a different dataset type each time round 

521 # so that a test failure in this subtest does not trigger 

522 # a cascade of tests because of file clashes 

523 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, conform=False) 

524 

525 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

526 

527 datastore.put(metrics, ref) 

528 

529 baseURI, compURIs = datastore.getURIs(ref) 

530 if disassembled: 

531 self.assertIsNone(baseURI) 

532 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

533 else: 

534 self.assertIsNotNone(baseURI) 

535 self.assertEqual(compURIs, {}) 

536 

537 metrics_get = datastore.get(ref) 

538 self.assertEqual(metrics_get, metrics) 

539 

540 # Retrieve the composite with read parameter 

541 stop = 4 

542 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

543 self.assertEqual(metrics_get.summary, metrics.summary) 

544 self.assertEqual(metrics_get.output, metrics.output) 

545 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

546 

547 # Retrieve a component 

548 data = datastore.get(ref.makeComponentRef("data")) 

549 self.assertEqual(data, metrics.data) 

550 

551 # On supported storage classes attempt to access a read 

552 # only component 

553 if "ReadComp" in sc.name: 

554 cRef = ref.makeComponentRef("counter") 

555 counter = datastore.get(cRef) 

556 self.assertEqual(counter, len(metrics.data)) 

557 

558 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

559 self.assertEqual(counter, stop) 

560 

561 datastore.remove(ref) 

562 

563 def prepDeleteTest(self, n_refs=1): 

564 metrics = makeExampleMetrics() 

565 datastore = self.makeDatastore() 

566 # Put 

567 dimensions = self.universe.extract(("visit", "physical_filter")) 

568 sc = self.storageClassFactory.getStorageClass("StructuredData") 

569 refs = [] 

570 for i in range(n_refs): 

571 dataId = FakeDataCoordinate.from_dict( 

572 {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U"} 

573 ) 

574 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

575 datastore.put(metrics, ref) 

576 

577 # Does it exist? 

578 self.assertTrue(datastore.exists(ref)) 

579 

580 # Get 

581 metricsOut = datastore.get(ref) 

582 self.assertEqual(metrics, metricsOut) 

583 refs.append(ref) 

584 

585 return datastore, *refs 

586 

587 def testRemove(self): 

588 datastore, ref = self.prepDeleteTest() 

589 

590 # Remove 

591 datastore.remove(ref) 

592 

593 # Does it exist? 

594 self.assertFalse(datastore.exists(ref)) 

595 

596 # Do we now get a predicted URI? 

597 uri = datastore.getURI(ref, predict=True) 

598 self.assertEqual(uri.fragment, "predicted") 

599 

600 # Get should now fail 

601 with self.assertRaises(FileNotFoundError): 

602 datastore.get(ref) 

603 # Can only delete once 

604 with self.assertRaises(FileNotFoundError): 

605 datastore.remove(ref) 

606 

607 def testForget(self): 

608 datastore, ref = self.prepDeleteTest() 

609 

610 # Remove 

611 datastore.forget([ref]) 

612 

613 # Does it exist (as far as we know)? 

614 self.assertFalse(datastore.exists(ref)) 

615 

616 # Do we now get a predicted URI? 

617 uri = datastore.getURI(ref, predict=True) 

618 self.assertEqual(uri.fragment, "predicted") 

619 

620 # Get should now fail 

621 with self.assertRaises(FileNotFoundError): 

622 datastore.get(ref) 

623 

624 # Forgetting again is a silent no-op 

625 datastore.forget([ref]) 

626 

627 # Predicted URI should still point to the file. 

628 self.assertTrue(uri.exists()) 

629 

630 def testTransfer(self): 

631 metrics = makeExampleMetrics() 

632 

633 dimensions = self.universe.extract(("visit", "physical_filter")) 

634 dataId = DataIdForTest({"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"}) 

635 

636 sc = self.storageClassFactory.getStorageClass("StructuredData") 

637 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

638 

639 inputDatastore = self.makeDatastore("test_input_datastore") 

640 outputDatastore = self.makeDatastore("test_output_datastore") 

641 

642 inputDatastore.put(metrics, ref) 

643 outputDatastore.transfer(inputDatastore, ref) 

644 

645 metricsOut = outputDatastore.get(ref) 

646 self.assertEqual(metrics, metricsOut) 

647 

648 def testBasicTransaction(self): 

649 datastore = self.makeDatastore() 

650 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

651 dimensions = self.universe.extract(("visit", "physical_filter")) 

652 nDatasets = 6 

653 dataIds = [ 

654 DataIdForTest({"instrument": "dummy", "visit": i, "physical_filter": "V"}) 

655 for i in range(nDatasets) 

656 ] 

657 data = [ 

658 ( 

659 self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False), 

660 makeExampleMetrics(), 

661 ) 

662 for dataId in dataIds 

663 ] 

664 succeed = data[: nDatasets // 2] 

665 fail = data[nDatasets // 2 :] 

666 # All datasets added in this transaction should continue to exist 

667 with datastore.transaction(): 

668 for ref, metrics in succeed: 

669 datastore.put(metrics, ref) 

670 # Whereas datasets added in this transaction should not 

671 with self.assertRaises(TransactionTestError): 

672 with datastore.transaction(): 

673 for ref, metrics in fail: 

674 datastore.put(metrics, ref) 

675 raise TransactionTestError("This should propagate out of the context manager") 

676 # Check for datasets that should exist 

677 for ref, metrics in succeed: 

678 # Does it exist? 

679 self.assertTrue(datastore.exists(ref)) 

680 # Get 

681 metricsOut = datastore.get(ref, parameters=None) 

682 self.assertEqual(metrics, metricsOut) 

683 # URI 

684 uri = datastore.getURI(ref) 

685 self.assertEqual(uri.scheme, self.uriScheme) 

686 # Check for datasets that should not exist 

687 for ref, _ in fail: 

688 # These should raise 

689 with self.assertRaises(FileNotFoundError): 

690 # non-existing file 

691 datastore.get(ref) 

692 with self.assertRaises(FileNotFoundError): 

693 datastore.getURI(ref) 

694 

695 def testNestedTransaction(self): 

696 datastore = self.makeDatastore() 

697 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

698 dimensions = self.universe.extract(("visit", "physical_filter")) 

699 metrics = makeExampleMetrics() 

700 

701 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"}) 

702 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

703 datastore.put(metrics, refBefore) 

704 with self.assertRaises(TransactionTestError): 

705 with datastore.transaction(): 

706 dataId = DataIdForTest({"instrument": "dummy", "visit": 1, "physical_filter": "V"}) 

707 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

708 datastore.put(metrics, refOuter) 

709 with datastore.transaction(): 

710 dataId = DataIdForTest({"instrument": "dummy", "visit": 2, "physical_filter": "V"}) 

711 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

712 datastore.put(metrics, refInner) 

713 # All datasets should exist 

714 for ref in (refBefore, refOuter, refInner): 

715 metricsOut = datastore.get(ref, parameters=None) 

716 self.assertEqual(metrics, metricsOut) 

717 raise TransactionTestError("This should roll back the transaction") 

718 # Dataset(s) inserted before the transaction should still exist 

719 metricsOut = datastore.get(refBefore, parameters=None) 

720 self.assertEqual(metrics, metricsOut) 

721 # But all datasets inserted during the (rolled back) transaction 

722 # should be gone 

723 with self.assertRaises(FileNotFoundError): 

724 datastore.get(refOuter) 

725 with self.assertRaises(FileNotFoundError): 

726 datastore.get(refInner) 

727 

728 def _prepareIngestTest(self): 

729 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

730 dimensions = self.universe.extract(("visit", "physical_filter")) 

731 metrics = makeExampleMetrics() 

732 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"}) 

733 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

734 return metrics, ref 

735 

736 def runIngestTest(self, func, expectOutput=True): 

737 metrics, ref = self._prepareIngestTest() 

738 # The file will be deleted after the test. 

739 # For symlink tests this leads to a situation where the datastore 

740 # points to a file that does not exist. This will make os.path.exist 

741 # return False but then the new symlink will fail with 

742 # FileExistsError later in the code so the test still passes. 

743 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path: 

744 with open(path, "w") as fd: 

745 yaml.dump(metrics._asdict(), stream=fd) 

746 func(metrics, path, ref) 

747 

748 def testIngestNoTransfer(self): 

749 """Test ingesting existing files with no transfer.""" 

750 for mode in (None, "auto"): 

751 # Some datastores have auto but can't do in place transfer 

752 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

753 continue 

754 

755 with self.subTest(mode=mode): 

756 datastore = self.makeDatastore() 

757 

758 def succeed(obj, path, ref): 

759 """Ingest a file already in the datastore root.""" 

760 # first move it into the root, and adjust the path 

761 # accordingly 

762 path = shutil.copy(path, datastore.root.ospath) 

763 path = os.path.relpath(path, start=datastore.root.ospath) 

764 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

765 self.assertEqual(obj, datastore.get(ref)) 

766 

767 def failInputDoesNotExist(obj, path, ref): 

768 """Can't ingest files if we're given a bad path.""" 

769 with self.assertRaises(FileNotFoundError): 

770 datastore.ingest( 

771 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode 

772 ) 

773 self.assertFalse(datastore.exists(ref)) 

774 

775 def failOutsideRoot(obj, path, ref): 

776 """Can't ingest files outside of datastore root unless 

777 auto.""" 

778 if mode == "auto": 

779 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

780 self.assertTrue(datastore.exists(ref)) 

781 else: 

782 with self.assertRaises(RuntimeError): 

783 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

784 self.assertFalse(datastore.exists(ref)) 

785 

786 def failNotImplemented(obj, path, ref): 

787 with self.assertRaises(NotImplementedError): 

788 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

789 

790 if mode in self.ingestTransferModes: 

791 self.runIngestTest(failOutsideRoot) 

792 self.runIngestTest(failInputDoesNotExist) 

793 self.runIngestTest(succeed) 

794 else: 

795 self.runIngestTest(failNotImplemented) 

796 

797 def testIngestTransfer(self): 

798 """Test ingesting existing files after transferring them.""" 

799 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

800 with self.subTest(mode=mode): 

801 datastore = self.makeDatastore(mode) 

802 

803 def succeed(obj, path, ref): 

804 """Ingest a file by transferring it to the template 

805 location.""" 

806 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

807 self.assertEqual(obj, datastore.get(ref)) 

808 

809 def failInputDoesNotExist(obj, path, ref): 

810 """Can't ingest files if we're given a bad path.""" 

811 with self.assertRaises(FileNotFoundError): 

812 # Ensure the file does not look like it is in 

813 # datastore for auto mode 

814 datastore.ingest( 

815 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode 

816 ) 

817 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

818 

819 def failNotImplemented(obj, path, ref): 

820 with self.assertRaises(NotImplementedError): 

821 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

822 

823 if mode in self.ingestTransferModes: 

824 self.runIngestTest(failInputDoesNotExist) 

825 self.runIngestTest(succeed, expectOutput=(mode != "move")) 

826 else: 

827 self.runIngestTest(failNotImplemented) 

828 

829 def testIngestSymlinkOfSymlink(self): 

830 """Special test for symlink to a symlink ingest""" 

831 metrics, ref = self._prepareIngestTest() 

832 # The aim of this test is to create a dataset on disk, then 

833 # create a symlink to it and finally ingest the symlink such that 

834 # the symlink in the datastore points to the original dataset. 

835 for mode in ("symlink", "relsymlink"): 

836 if mode not in self.ingestTransferModes: 

837 continue 

838 

839 print(f"Trying mode {mode}") 

840 with lsst.utils.tests.getTempFilePath(".yaml") as realpath: 

841 with open(realpath, "w") as fd: 

842 yaml.dump(metrics._asdict(), stream=fd) 

843 with lsst.utils.tests.getTempFilePath(".yaml") as sympath: 

844 os.symlink(os.path.abspath(realpath), sympath) 

845 

846 datastore = self.makeDatastore() 

847 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

848 

849 uri = datastore.getURI(ref) 

850 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

851 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

852 

853 linkTarget = os.readlink(uri.ospath) 

854 if mode == "relsymlink": 

855 self.assertFalse(os.path.isabs(linkTarget)) 

856 else: 

857 self.assertEqual(linkTarget, os.path.abspath(realpath)) 

858 

859 # Check that we can get the dataset back regardless of mode 

860 metric2 = datastore.get(ref) 

861 self.assertEqual(metric2, metrics) 

862 

863 # Cleanup the file for next time round loop 

864 # since it will get the same file name in store 

865 datastore.remove(ref) 

866 

867 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]: 

868 datastore = self.makeDatastore(name) 

869 

870 # For now only the FileDatastore can be used for this test. 

871 # ChainedDatastore that only includes InMemoryDatastores have to be 

872 # skipped as well. 

873 for name in datastore.names: 

874 if not name.startswith("InMemoryDatastore"): 

875 break 

876 else: 

877 raise unittest.SkipTest("in-memory datastore does not support record export/import") 

878 

879 metrics = makeExampleMetrics() 

880 dimensions = self.universe.extract(("visit", "physical_filter")) 

881 sc = self.storageClassFactory.getStorageClass("StructuredData") 

882 

883 refs = [] 

884 for visit in (2048, 2049, 2050): 

885 dataId = FakeDataCoordinate.from_dict( 

886 {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime"} 

887 ) 

888 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

889 datastore.put(metrics, ref) 

890 refs.append(ref) 

891 return datastore, refs 

892 

893 def testExportImportRecords(self): 

894 """Test for export_records and import_records methods.""" 

895 datastore, refs = self._populate_export_datastore("test_datastore") 

896 for exported_refs in (refs, refs[1:]): 

897 n_refs = len(exported_refs) 

898 records = datastore.export_records(exported_refs) 

899 self.assertGreater(len(records), 0) 

900 self.assertTrue(set(records.keys()) <= set(datastore.names)) 

901 # In a ChainedDatastore each FileDatastore will have a complete set 

902 for datastore_name in records: 

903 record_data = records[datastore_name] 

904 self.assertEqual(len(record_data.records), n_refs) 

905 

906 # Use the same datastore name to import relative path. 

907 datastore2 = self.makeDatastore("test_datastore") 

908 

909 records = datastore.export_records(refs[1:]) 

910 datastore2.import_records(records) 

911 

912 with self.assertRaises(FileNotFoundError): 

913 data = datastore2.get(refs[0]) 

914 data = datastore2.get(refs[1]) 

915 self.assertIsNotNone(data) 

916 data = datastore2.get(refs[2]) 

917 self.assertIsNotNone(data) 

918 

919 def testExport(self): 

920 datastore, refs = self._populate_export_datastore("test_datastore") 

921 

922 datasets = list(datastore.export(refs)) 

923 self.assertEqual(len(datasets), 3) 

924 

925 for transfer in (None, "auto"): 

926 # Both will default to None 

927 datasets = list(datastore.export(refs, transfer=transfer)) 

928 self.assertEqual(len(datasets), 3) 

929 

930 with self.assertRaises(TypeError): 

931 list(datastore.export(refs, transfer="copy")) 

932 

933 with self.assertRaises(TypeError): 

934 list(datastore.export(refs, directory="exportDir", transfer="move")) 

935 

936 # Create a new ref that is not known to the datastore and try to 

937 # export it. 

938 sc = self.storageClassFactory.getStorageClass("ThingOne") 

939 dimensions = self.universe.extract(("visit", "physical_filter")) 

940 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

941 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

942 with self.assertRaises(FileNotFoundError): 

943 list(datastore.export(refs + [ref], transfer=None)) 

944 

945 def test_pydantic_dict_storage_class_conversions(self): 

946 """Test converting a dataset stored as a pydantic model into a dict on 

947 read. 

948 """ 

949 datastore = self.makeDatastore() 

950 store_as_model = self.makeDatasetRef( 

951 "store_as_model", 

952 dimensions=self.universe.empty, 

953 storageClass="DictConvertibleModel", 

954 dataId=DataCoordinate.makeEmpty(self.universe), 

955 ) 

956 content = {"a": "one", "b": "two"} 

957 model = DictConvertibleModel.from_dict(content, extra="original content") 

958 datastore.put(model, store_as_model) 

959 retrieved_model = datastore.get(store_as_model) 

960 self.assertEqual(retrieved_model, model) 

961 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel")) 

962 self.assertEqual(type(loaded), dict) 

963 self.assertEqual(loaded, content) 

964 

965 def test_simple_class_put_get(self): 

966 """Test that we can put and get a simple class with dict() 

967 constructor.""" 

968 datastore = self.makeDatastore() 

969 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

970 self._assert_different_puts(datastore, "MetricsExample", data) 

971 

972 def test_dataclass_put_get(self): 

973 """Test that we can put and get a simple dataclass.""" 

974 datastore = self.makeDatastore() 

975 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

976 self._assert_different_puts(datastore, "MetricsExampleDataclass", data) 

977 

978 def test_pydantic_put_get(self): 

979 """Test that we can put and get a simple Pydantic model.""" 

980 datastore = self.makeDatastore() 

981 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

982 self._assert_different_puts(datastore, "MetricsExampleModel", data) 

983 

984 def test_tuple_put_get(self): 

985 """Test that we can put and get a tuple.""" 

986 datastore = self.makeDatastore() 

987 data = tuple(["a", "b", 1]) 

988 self._assert_different_puts(datastore, "TupleExample", data) 

989 

990 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data) -> None: 

991 refs = { 

992 x: self.makeDatasetRef( 

993 f"stora_as_{x}", 

994 dimensions=self.universe.empty, 

995 storageClass=f"{storageClass_root}{x}", 

996 dataId=DataCoordinate.makeEmpty(self.universe), 

997 ) 

998 for x in ["A", "B"] 

999 } 

1000 

1001 for ref in refs.values(): 

1002 datastore.put(data, ref) 

1003 

1004 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"])) 

1005 

1006 

1007class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1008 """PosixDatastore specialization""" 

1009 

1010 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1011 uriScheme = "file" 

1012 canIngestNoTransferAuto = True 

1013 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

1014 isEphemeral = False 

1015 rootKeys = ("root",) 

1016 validationCanFail = True 

1017 

1018 def setUp(self): 

1019 # Override the working directory before calling the base class 

1020 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1021 super().setUp() 

1022 

1023 def testAtomicWrite(self): 

1024 """Test that we write to a temporary and then rename""" 

1025 datastore = self.makeDatastore() 

1026 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1027 dimensions = self.universe.extract(("visit", "physical_filter")) 

1028 metrics = makeExampleMetrics() 

1029 

1030 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"}) 

1031 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

1032 

1033 with self.assertLogs("lsst.resources", "DEBUG") as cm: 

1034 datastore.put(metrics, ref) 

1035 move_logs = [ll for ll in cm.output if "transfer=" in ll] 

1036 self.assertIn("transfer=move", move_logs[0]) 

1037 

1038 # And the transfer should be file to file. 

1039 self.assertEqual(move_logs[0].count("file://"), 2) 

1040 

1041 def testCanNotDeterminePutFormatterLocation(self): 

1042 """Verify that the expected exception is raised if the FileDatastore 

1043 can not determine the put formatter location.""" 

1044 

1045 _ = makeExampleMetrics() 

1046 datastore = self.makeDatastore() 

1047 

1048 # Create multiple storage classes for testing different formulations 

1049 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1050 

1051 sccomp = StorageClass("Dummy") 

1052 compositeStorageClass = StorageClass( 

1053 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp} 

1054 ) 

1055 

1056 dimensions = self.universe.extract(("visit", "physical_filter")) 

1057 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1058 

1059 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

1060 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId, conform=False) 

1061 

1062 def raiser(ref): 

1063 raise DatasetTypeNotSupportedError() 

1064 

1065 with unittest.mock.patch.object( 

1066 lsst.daf.butler.datastores.fileDatastore.FileDatastore, 

1067 "_determine_put_formatter_location", 

1068 side_effect=raiser, 

1069 ): 

1070 # verify the non-composite ref execution path: 

1071 with self.assertRaises(DatasetTypeNotSupportedError): 

1072 datastore.getURIs(ref, predict=True) 

1073 

1074 # verify the composite-ref execution path: 

1075 with self.assertRaises(DatasetTypeNotSupportedError): 

1076 datastore.getURIs(compRef, predict=True) 

1077 

1078 

1079class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

1080 """Posix datastore tests but with checksums disabled.""" 

1081 

1082 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

1083 

1084 def testChecksum(self): 

1085 """Ensure that checksums have not been calculated.""" 

1086 

1087 datastore = self.makeDatastore() 

1088 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1089 dimensions = self.universe.extract(("visit", "physical_filter")) 

1090 metrics = makeExampleMetrics() 

1091 

1092 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"}) 

1093 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

1094 

1095 # Configuration should have disabled checksum calculation 

1096 datastore.put(metrics, ref) 

1097 infos = datastore.getStoredItemsInfo(ref) 

1098 self.assertIsNone(infos[0].checksum) 

1099 

1100 # Remove put back but with checksums enabled explicitly 

1101 datastore.remove(ref) 

1102 datastore.useChecksum = True 

1103 datastore.put(metrics, ref) 

1104 

1105 infos = datastore.getStoredItemsInfo(ref) 

1106 self.assertIsNotNone(infos[0].checksum) 

1107 

1108 

1109class TrashDatastoreTestCase(PosixDatastoreTestCase): 

1110 """Restrict trash test to FileDatastore.""" 

1111 

1112 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1113 

1114 def testTrash(self): 

1115 datastore, *refs = self.prepDeleteTest(n_refs=10) 

1116 

1117 # Trash one of them. 

1118 ref = refs.pop() 

1119 uri = datastore.getURI(ref) 

1120 datastore.trash(ref) 

1121 self.assertTrue(uri.exists(), uri) # Not deleted yet 

1122 datastore.emptyTrash() 

1123 self.assertFalse(uri.exists(), uri) 

1124 

1125 # Trash it again should be fine. 

1126 datastore.trash(ref) 

1127 

1128 # Trash multiple items at once. 

1129 subset = [refs.pop(), refs.pop()] 

1130 datastore.trash(subset) 

1131 datastore.emptyTrash() 

1132 

1133 # Remove a record and trash should do nothing. 

1134 # This is execution butler scenario. 

1135 ref = refs.pop() 

1136 uri = datastore.getURI(ref) 

1137 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

1138 self.assertTrue(uri.exists()) 

1139 datastore.trash(ref) 

1140 datastore.emptyTrash() 

1141 self.assertTrue(uri.exists()) 

1142 

1143 # Switch on trust and it should delete the file. 

1144 datastore.trustGetRequest = True 

1145 datastore.trash([ref]) 

1146 self.assertFalse(uri.exists()) 

1147 

1148 # Remove multiples at once in trust mode. 

1149 subset = [refs.pop() for i in range(3)] 

1150 datastore.trash(subset) 

1151 datastore.trash(refs.pop()) # Check that a single ref can trash 

1152 

1153 

1154class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

1155 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1156 

1157 def setUp(self): 

1158 # Override the working directory before calling the base class 

1159 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1160 super().setUp() 

1161 

1162 def testCleanup(self): 

1163 """Test that a failed formatter write does cleanup a partial file.""" 

1164 metrics = makeExampleMetrics() 

1165 datastore = self.makeDatastore() 

1166 

1167 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1168 

1169 dimensions = self.universe.extract(("visit", "physical_filter")) 

1170 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1171 

1172 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

1173 

1174 # Determine where the file will end up (we assume Formatters use 

1175 # the same file extension) 

1176 expectedUri = datastore.getURI(ref, predict=True) 

1177 self.assertEqual(expectedUri.fragment, "predicted") 

1178 

1179 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}") 

1180 

1181 # Try formatter that fails and formatter that fails and leaves 

1182 # a file behind 

1183 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

1184 with self.subTest(formatter=formatter): 

1185 # Monkey patch the formatter 

1186 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True) 

1187 

1188 # Try to put the dataset, it should fail 

1189 with self.assertRaises(Exception): 

1190 datastore.put(metrics, ref) 

1191 

1192 # Check that there is no file on disk 

1193 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1194 

1195 # Check that there is a directory 

1196 dir = expectedUri.dirname() 

1197 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}") 

1198 

1199 # Force YamlFormatter and check that this time a file is written 

1200 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True) 

1201 datastore.put(metrics, ref) 

1202 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1203 datastore.remove(ref) 

1204 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

1205 

1206 

1207class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1208 """PosixDatastore specialization""" 

1209 

1210 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

1211 uriScheme = "mem" 

1212 hasUnsupportedPut = False 

1213 ingestTransferModes = () 

1214 isEphemeral = True 

1215 rootKeys = None 

1216 validationCanFail = False 

1217 

1218 

1219class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

1220 """ChainedDatastore specialization using a POSIXDatastore""" 

1221 

1222 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

1223 hasUnsupportedPut = False 

1224 canIngestNoTransferAuto = False 

1225 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto") 

1226 isEphemeral = False 

1227 rootKeys = (".datastores.1.root", ".datastores.2.root") 

1228 validationCanFail = True 

1229 

1230 

1231class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

1232 """ChainedDatastore specialization using all InMemoryDatastore""" 

1233 

1234 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

1235 validationCanFail = False 

1236 

1237 

1238class DatastoreConstraintsTests(DatastoreTestsBase): 

1239 """Basic tests of constraints model of Datastores.""" 

1240 

1241 def testConstraints(self): 

1242 """Test constraints model. Assumes that each test class has the 

1243 same constraints.""" 

1244 metrics = makeExampleMetrics() 

1245 datastore = self.makeDatastore() 

1246 

1247 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1248 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1249 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1250 dataId = DataIdForTest({"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}) 

1251 

1252 # Write empty file suitable for ingest check (JSON and YAML variants) 

1253 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1254 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1255 for datasetTypeName, sc, accepted in ( 

1256 ("metric", sc1, True), 

1257 ("metric5", sc1, False), 

1258 ("metric33", sc1, True), 

1259 ("metric5", sc2, True), 

1260 ): 

1261 # Choose different temp file depending on StorageClass 

1262 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1263 

1264 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

1265 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

1266 if accepted: 

1267 datastore.put(metrics, ref) 

1268 self.assertTrue(datastore.exists(ref)) 

1269 datastore.remove(ref) 

1270 

1271 # Try ingest 

1272 if self.canIngest: 

1273 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1274 self.assertTrue(datastore.exists(ref)) 

1275 datastore.remove(ref) 

1276 else: 

1277 with self.assertRaises(DatasetTypeNotSupportedError): 

1278 datastore.put(metrics, ref) 

1279 self.assertFalse(datastore.exists(ref)) 

1280 

1281 # Again with ingest 

1282 if self.canIngest: 

1283 with self.assertRaises(DatasetTypeNotSupportedError): 

1284 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1285 self.assertFalse(datastore.exists(ref)) 

1286 

1287 

1288class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1289 """PosixDatastore specialization""" 

1290 

1291 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

1292 canIngest = True 

1293 

1294 def setUp(self): 

1295 # Override the working directory before calling the base class 

1296 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1297 super().setUp() 

1298 

1299 

1300class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1301 """InMemoryDatastore specialization""" 

1302 

1303 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

1304 canIngest = False 

1305 

1306 

1307class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

1308 """ChainedDatastore specialization using a POSIXDatastore and constraints 

1309 at the ChainedDatstore""" 

1310 

1311 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

1312 

1313 

1314class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

1315 """ChainedDatastore specialization using a POSIXDatastore""" 

1316 

1317 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

1318 

1319 

1320class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

1321 """ChainedDatastore specialization using all InMemoryDatastore""" 

1322 

1323 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

1324 canIngest = False 

1325 

1326 

1327class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

1328 """Test that a chained datastore can control constraints per-datastore 

1329 even if child datastore would accept.""" 

1330 

1331 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

1332 

1333 def setUp(self): 

1334 # Override the working directory before calling the base class 

1335 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1336 super().setUp() 

1337 

1338 def testConstraints(self): 

1339 """Test chained datastore constraints model.""" 

1340 metrics = makeExampleMetrics() 

1341 datastore = self.makeDatastore() 

1342 

1343 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1344 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1345 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1346 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

1347 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"} 

1348 

1349 # Write empty file suitable for ingest check (JSON and YAML variants) 

1350 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1351 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1352 

1353 for typeName, dataId, sc, accept, ingest in ( 

1354 ("metric", dataId1, sc1, (False, True, False), True), 

1355 ("metric5", dataId1, sc1, (False, False, False), False), 

1356 ("metric5", dataId2, sc1, (True, False, False), False), 

1357 ("metric33", dataId2, sc2, (True, True, False), True), 

1358 ("metric5", dataId1, sc2, (False, True, False), True), 

1359 ): 

1360 # Choose different temp file depending on StorageClass 

1361 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1362 

1363 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

1364 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, conform=False) 

1365 if any(accept): 

1366 datastore.put(metrics, ref) 

1367 self.assertTrue(datastore.exists(ref)) 

1368 

1369 # Check each datastore inside the chained datastore 

1370 for childDatastore, expected in zip(datastore.datastores, accept): 

1371 self.assertEqual( 

1372 childDatastore.exists(ref), 

1373 expected, 

1374 f"Testing presence of {ref} in datastore {childDatastore.name}", 

1375 ) 

1376 

1377 datastore.remove(ref) 

1378 

1379 # Check that ingest works 

1380 if ingest: 

1381 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1382 self.assertTrue(datastore.exists(ref)) 

1383 

1384 # Check each datastore inside the chained datastore 

1385 for childDatastore, expected in zip(datastore.datastores, accept): 

1386 # Ephemeral datastores means InMemory at the moment 

1387 # and that does not accept ingest of files. 

1388 if childDatastore.isEphemeral: 

1389 expected = False 

1390 self.assertEqual( 

1391 childDatastore.exists(ref), 

1392 expected, 

1393 f"Testing presence of ingested {ref} in datastore {childDatastore.name}", 

1394 ) 

1395 

1396 datastore.remove(ref) 

1397 else: 

1398 with self.assertRaises(DatasetTypeNotSupportedError): 

1399 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1400 

1401 else: 

1402 with self.assertRaises(DatasetTypeNotSupportedError): 

1403 datastore.put(metrics, ref) 

1404 self.assertFalse(datastore.exists(ref)) 

1405 

1406 # Again with ingest 

1407 with self.assertRaises(DatasetTypeNotSupportedError): 

1408 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1409 self.assertFalse(datastore.exists(ref)) 

1410 

1411 

1412class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase): 

1413 """Tests for datastore caching infrastructure.""" 

1414 

1415 @classmethod 

1416 def setUpClass(cls): 

1417 cls.storageClassFactory = StorageClassFactory() 

1418 cls.universe = DimensionUniverse() 

1419 

1420 # Ensure that we load the test storage class definitions. 

1421 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

1422 cls.storageClassFactory.addFromConfig(scConfigFile) 

1423 

1424 def setUp(self): 

1425 self.id = 0 

1426 

1427 # Create a root that we can use for caching tests. 

1428 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1429 

1430 # Create some test dataset refs and associated test files 

1431 sc = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1432 dimensions = self.universe.extract(("visit", "physical_filter")) 

1433 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1434 

1435 # Create list of refs and list of temporary files 

1436 n_datasets = 10 

1437 self.refs = [ 

1438 self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, conform=False) 

1439 for n in range(n_datasets) 

1440 ] 

1441 

1442 root_uri = ResourcePath(self.root, forceDirectory=True) 

1443 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)] 

1444 

1445 # Create test files. 

1446 for uri in self.files: 

1447 uri.write(b"0123456789") 

1448 

1449 # Create some composite refs with component files. 

1450 sc = self.storageClassFactory.getStorageClass("StructuredData") 

1451 self.composite_refs = [ 

1452 self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, conform=False) for n in range(3) 

1453 ] 

1454 self.comp_files = [] 

1455 self.comp_refs = [] 

1456 for n, ref in enumerate(self.composite_refs): 

1457 component_refs = [] 

1458 component_files = [] 

1459 for component in sc.components: 

1460 component_ref = ref.makeComponentRef(component) 

1461 file = root_uri.join(f"composite_file-{n}-{component}.txt") 

1462 component_refs.append(component_ref) 

1463 component_files.append(file) 

1464 file.write(b"9876543210") 

1465 

1466 self.comp_files.append(component_files) 

1467 self.comp_refs.append(component_refs) 

1468 

1469 def tearDown(self): 

1470 if self.root is not None and os.path.exists(self.root): 

1471 shutil.rmtree(self.root, ignore_errors=True) 

1472 

1473 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager: 

1474 config = Config.fromYaml(config_str) 

1475 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe) 

1476 

1477 def testNoCacheDir(self): 

1478 config_str = """ 

1479cached: 

1480 root: null 

1481 cacheable: 

1482 metric0: true 

1483 """ 

1484 cache_manager = self._make_cache_manager(config_str) 

1485 

1486 # Look inside to check we don't have a cache directory 

1487 self.assertIsNone(cache_manager._cache_directory) 

1488 

1489 self.assertCache(cache_manager) 

1490 

1491 # Test that the cache directory is marked temporary 

1492 self.assertTrue(cache_manager.cache_directory.isTemporary) 

1493 

1494 def testNoCacheDirReversed(self): 

1495 """Use default caching status and metric1 to false""" 

1496 config_str = """ 

1497cached: 

1498 root: null 

1499 default: true 

1500 cacheable: 

1501 metric1: false 

1502 """ 

1503 cache_manager = self._make_cache_manager(config_str) 

1504 

1505 self.assertCache(cache_manager) 

1506 

1507 def testEnvvarCacheDir(self): 

1508 config_str = f""" 

1509cached: 

1510 root: '{self.root}' 

1511 cacheable: 

1512 metric0: true 

1513 """ 

1514 

1515 root = ResourcePath(self.root, forceDirectory=True) 

1516 env_dir = root.join("somewhere", forceDirectory=True) 

1517 elsewhere = root.join("elsewhere", forceDirectory=True) 

1518 

1519 # Environment variable should override the config value. 

1520 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}): 

1521 cache_manager = self._make_cache_manager(config_str) 

1522 self.assertEqual(cache_manager.cache_directory, env_dir) 

1523 

1524 # This environment variable should not override the config value. 

1525 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1526 cache_manager = self._make_cache_manager(config_str) 

1527 self.assertEqual(cache_manager.cache_directory, root) 

1528 

1529 # No default setting. 

1530 config_str = """ 

1531cached: 

1532 root: null 

1533 default: true 

1534 cacheable: 

1535 metric1: false 

1536 """ 

1537 cache_manager = self._make_cache_manager(config_str) 

1538 

1539 # This environment variable should override the config value. 

1540 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1541 cache_manager = self._make_cache_manager(config_str) 

1542 self.assertEqual(cache_manager.cache_directory, env_dir) 

1543 

1544 # If both environment variables are set the main (not IF_UNSET) 

1545 # variable should win. 

1546 with unittest.mock.patch.dict( 

1547 os.environ, 

1548 { 

1549 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath, 

1550 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath, 

1551 }, 

1552 ): 

1553 cache_manager = self._make_cache_manager(config_str) 

1554 self.assertEqual(cache_manager.cache_directory, env_dir) 

1555 

1556 # Use the API to set the environment variable, making sure that the 

1557 # variable is reset on exit. 

1558 with unittest.mock.patch.dict( 

1559 os.environ, 

1560 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1561 ): 

1562 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1563 self.assertTrue(defined) 

1564 cache_manager = self._make_cache_manager(config_str) 

1565 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1566 

1567 # Now create the cache manager ahead of time and set the fallback 

1568 # later. 

1569 cache_manager = self._make_cache_manager(config_str) 

1570 self.assertIsNone(cache_manager._cache_directory) 

1571 with unittest.mock.patch.dict( 

1572 os.environ, 

1573 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1574 ): 

1575 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1576 self.assertTrue(defined) 

1577 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1578 

1579 def testExplicitCacheDir(self): 

1580 config_str = f""" 

1581cached: 

1582 root: '{self.root}' 

1583 cacheable: 

1584 metric0: true 

1585 """ 

1586 cache_manager = self._make_cache_manager(config_str) 

1587 

1588 # Look inside to check we do have a cache directory. 

1589 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True)) 

1590 

1591 self.assertCache(cache_manager) 

1592 

1593 # Test that the cache directory is not marked temporary 

1594 self.assertFalse(cache_manager.cache_directory.isTemporary) 

1595 

1596 def assertCache(self, cache_manager): 

1597 self.assertTrue(cache_manager.should_be_cached(self.refs[0])) 

1598 self.assertFalse(cache_manager.should_be_cached(self.refs[1])) 

1599 

1600 uri = cache_manager.move_to_cache(self.files[0], self.refs[0]) 

1601 self.assertIsInstance(uri, ResourcePath) 

1602 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1])) 

1603 

1604 # Check presence in cache using ref and then using file extension. 

1605 self.assertFalse(cache_manager.known_to_cache(self.refs[1])) 

1606 self.assertTrue(cache_manager.known_to_cache(self.refs[0])) 

1607 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension())) 

1608 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension())) 

1609 

1610 # Cached file should no longer exist but uncached file should be 

1611 # unaffected. 

1612 self.assertFalse(self.files[0].exists()) 

1613 self.assertTrue(self.files[1].exists()) 

1614 

1615 # Should find this file and it should be within the cache directory. 

1616 with cache_manager.find_in_cache(self.refs[0], ".txt") as found: 

1617 self.assertTrue(found.exists()) 

1618 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory)) 

1619 

1620 # Should not be able to find these in cache 

1621 with cache_manager.find_in_cache(self.refs[0], ".fits") as found: 

1622 self.assertIsNone(found) 

1623 with cache_manager.find_in_cache(self.refs[1], ".fits") as found: 

1624 self.assertIsNone(found) 

1625 

1626 def testNoCache(self): 

1627 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe) 

1628 for uri, ref in zip(self.files, self.refs): 

1629 self.assertFalse(cache_manager.should_be_cached(ref)) 

1630 self.assertIsNone(cache_manager.move_to_cache(uri, ref)) 

1631 self.assertFalse(cache_manager.known_to_cache(ref)) 

1632 with cache_manager.find_in_cache(ref, ".txt") as found: 

1633 self.assertIsNone(found, msg=f"{cache_manager}") 

1634 

1635 def _expiration_config(self, mode: str, threshold: int) -> str: 

1636 return f""" 

1637cached: 

1638 default: true 

1639 expiry: 

1640 mode: {mode} 

1641 threshold: {threshold} 

1642 cacheable: 

1643 unused: true 

1644 """ 

1645 

1646 def testCacheExpiryFiles(self): 

1647 threshold = 2 # Keep at least 2 files. 

1648 mode = "files" 

1649 config_str = self._expiration_config(mode, threshold) 

1650 

1651 cache_manager = self._make_cache_manager(config_str) 

1652 

1653 # Check that an empty cache returns unknown for arbitrary ref 

1654 self.assertFalse(cache_manager.known_to_cache(self.refs[0])) 

1655 

1656 # Should end with datasets: 2, 3, 4 

1657 self.assertExpiration(cache_manager, 5, threshold + 1) 

1658 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1659 

1660 # Check that we will not expire a file that is actively in use. 

1661 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1662 self.assertIsNotNone(found) 

1663 

1664 # Trigger cache expiration that should remove the file 

1665 # we just retrieved. Should now have: 3, 4, 5 

1666 cached = cache_manager.move_to_cache(self.files[5], self.refs[5]) 

1667 self.assertIsNotNone(cached) 

1668 

1669 # Cache should still report the standard file count. 

1670 self.assertEqual(cache_manager.file_count, threshold + 1) 

1671 

1672 # Add additional entry to cache. 

1673 # Should now have 4, 5, 6 

1674 cached = cache_manager.move_to_cache(self.files[6], self.refs[6]) 

1675 self.assertIsNotNone(cached) 

1676 

1677 # Is the file still there? 

1678 self.assertTrue(found.exists()) 

1679 

1680 # Can we read it? 

1681 data = found.read() 

1682 self.assertGreater(len(data), 0) 

1683 

1684 # Outside context the file should no longer exist. 

1685 self.assertFalse(found.exists()) 

1686 

1687 # File count should not have changed. 

1688 self.assertEqual(cache_manager.file_count, threshold + 1) 

1689 

1690 # Dataset 2 was in the exempt directory but because hardlinks 

1691 # are used it was deleted from the main cache during cache expiry 

1692 # above and so should no longer be found. 

1693 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1694 self.assertIsNone(found) 

1695 

1696 # And the one stored after it is also gone. 

1697 with cache_manager.find_in_cache(self.refs[3], ".txt") as found: 

1698 self.assertIsNone(found) 

1699 

1700 # But dataset 4 is present. 

1701 with cache_manager.find_in_cache(self.refs[4], ".txt") as found: 

1702 self.assertIsNotNone(found) 

1703 

1704 # Adding a new dataset to the cache should now delete it. 

1705 cache_manager.move_to_cache(self.files[7], self.refs[7]) 

1706 

1707 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1708 self.assertIsNone(found) 

1709 

1710 def testCacheExpiryDatasets(self): 

1711 threshold = 2 # Keep 2 datasets. 

1712 mode = "datasets" 

1713 config_str = self._expiration_config(mode, threshold) 

1714 

1715 cache_manager = self._make_cache_manager(config_str) 

1716 self.assertExpiration(cache_manager, 5, threshold + 1) 

1717 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1718 

1719 def testCacheExpiryDatasetsComposite(self): 

1720 threshold = 2 # Keep 2 datasets. 

1721 mode = "datasets" 

1722 config_str = self._expiration_config(mode, threshold) 

1723 

1724 cache_manager = self._make_cache_manager(config_str) 

1725 

1726 n_datasets = 3 

1727 for i in range(n_datasets): 

1728 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]): 

1729 cached = cache_manager.move_to_cache(component_file, component_ref) 

1730 self.assertIsNotNone(cached) 

1731 self.assertTrue(cache_manager.known_to_cache(component_ref)) 

1732 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef())) 

1733 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension())) 

1734 

1735 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files 

1736 

1737 # Write two new non-composite and the number of files should drop. 

1738 self.assertExpiration(cache_manager, 2, 5) 

1739 

1740 def testCacheExpirySize(self): 

1741 threshold = 55 # Each file is 10 bytes 

1742 mode = "size" 

1743 config_str = self._expiration_config(mode, threshold) 

1744 

1745 cache_manager = self._make_cache_manager(config_str) 

1746 self.assertExpiration(cache_manager, 10, 6) 

1747 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1748 

1749 def assertExpiration(self, cache_manager, n_datasets, n_retained): 

1750 """Insert the datasets and then check the number retained.""" 

1751 for i in range(n_datasets): 

1752 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1753 self.assertIsNotNone(cached) 

1754 

1755 self.assertEqual(cache_manager.file_count, n_retained) 

1756 

1757 # The oldest file should not be in the cache any more. 

1758 for i in range(n_datasets): 

1759 with cache_manager.find_in_cache(self.refs[i], ".txt") as found: 

1760 if i >= n_datasets - n_retained: 

1761 self.assertIsInstance(found, ResourcePath) 

1762 else: 

1763 self.assertIsNone(found) 

1764 

1765 def testCacheExpiryAge(self): 

1766 threshold = 1 # Expire older than 2 seconds 

1767 mode = "age" 

1768 config_str = self._expiration_config(mode, threshold) 

1769 

1770 cache_manager = self._make_cache_manager(config_str) 

1771 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1772 

1773 # Insert 3 files, then sleep, then insert more. 

1774 for i in range(2): 

1775 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1776 self.assertIsNotNone(cached) 

1777 time.sleep(2.0) 

1778 for j in range(4): 

1779 i = 2 + j # Continue the counting 

1780 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1781 self.assertIsNotNone(cached) 

1782 

1783 # Only the files written after the sleep should exist. 

1784 self.assertEqual(cache_manager.file_count, 4) 

1785 with cache_manager.find_in_cache(self.refs[1], ".txt") as found: 

1786 self.assertIsNone(found) 

1787 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1788 self.assertIsInstance(found, ResourcePath) 

1789 

1790 

1791class DatasetRefURIsTestCase(unittest.TestCase): 

1792 """Tests for DatasetRefURIs.""" 

1793 

1794 def testSequenceAccess(self): 

1795 """Verify that DatasetRefURIs can be treated like a two-item tuple.""" 

1796 uris = DatasetRefURIs() 

1797 

1798 self.assertEqual(len(uris), 2) 

1799 self.assertEqual(uris[0], None) 

1800 self.assertEqual(uris[1], {}) 

1801 

1802 primaryURI = ResourcePath("1/2/3") 

1803 componentURI = ResourcePath("a/b/c") 

1804 

1805 # affirm that DatasetRefURIs does not support MutableSequence functions 

1806 with self.assertRaises(TypeError): 

1807 uris[0] = primaryURI 

1808 with self.assertRaises(TypeError): 

1809 uris[1] = {"foo": componentURI} 

1810 

1811 # but DatasetRefURIs can be set by property name: 

1812 uris.primaryURI = primaryURI 

1813 uris.componentURIs = {"foo": componentURI} 

1814 self.assertEqual(uris.primaryURI, primaryURI) 

1815 self.assertEqual(uris[0], primaryURI) 

1816 

1817 primary, components = uris 

1818 self.assertEqual(primary, primaryURI) 

1819 self.assertEqual(components, {"foo": componentURI}) 

1820 

1821 def testRepr(self): 

1822 """Verify __repr__ output.""" 

1823 uris = DatasetRefURIs(ResourcePath("1/2/3"), {"comp": ResourcePath("a/b/c")}) 

1824 self.assertEqual( 

1825 repr(uris), 

1826 f'DatasetRefURIs(ResourcePath("{os.getcwd()}/1/2/3"), ' 

1827 f"{{'comp': ResourcePath(\"{os.getcwd()}/a/b/c\")}})", 

1828 ) 

1829 

1830 

1831class DataIdForTestTestCase(unittest.TestCase): 

1832 """Tests for the DataIdForTest class.""" 

1833 

1834 def testImmutable(self): 

1835 """Verify that an instance is immutable by default.""" 

1836 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1837 initial_hash = hash(dataId) 

1838 

1839 with self.assertRaises(RuntimeError): 

1840 dataId["instrument"] = "foo" 

1841 

1842 with self.assertRaises(RuntimeError): 

1843 del dataId["instrument"] 

1844 

1845 assert sys.version_info[0] == 3 

1846 if sys.version_info[1] >= 9: 

1847 with self.assertRaises(RuntimeError): 

1848 dataId |= dict(foo="bar") 

1849 

1850 with self.assertRaises(RuntimeError): 

1851 dataId.pop("instrument") 

1852 

1853 with self.assertRaises(RuntimeError): 

1854 dataId.popitem() 

1855 

1856 with self.assertRaises(RuntimeError): 

1857 dataId.update(dict(instrument="foo")) 

1858 

1859 # verify that the hash value has not changed. 

1860 self.assertEqual(initial_hash, hash(dataId)) 

1861 

1862 def testMutable(self): 

1863 """Verify that an instance can be made mutable (unfrozen).""" 

1864 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1865 initial_hash = hash(dataId) 

1866 dataId.frozen = False 

1867 self.assertEqual(initial_hash, hash(dataId)) 

1868 

1869 dataId["instrument"] = "foo" 

1870 self.assertEqual(dataId["instrument"], "foo") 

1871 self.assertNotEqual(initial_hash, hash(dataId)) 

1872 initial_hash = hash(dataId) 

1873 

1874 del dataId["instrument"] 

1875 self.assertTrue("instrument" not in dataId) 

1876 self.assertNotEqual(initial_hash, hash(dataId)) 

1877 initial_hash = hash(dataId) 

1878 

1879 assert sys.version_info[0] == 3 

1880 if sys.version_info[1] >= 9: 

1881 dataId |= dict(foo="bar") 

1882 self.assertEqual(dataId["foo"], "bar") 

1883 self.assertNotEqual(initial_hash, hash(dataId)) 

1884 initial_hash = hash(dataId) 

1885 

1886 dataId.pop("visit") 

1887 self.assertTrue("visit" not in dataId) 

1888 self.assertNotEqual(initial_hash, hash(dataId)) 

1889 initial_hash = hash(dataId) 

1890 

1891 dataId.popitem() 

1892 self.assertTrue("physical_filter" not in dataId) 

1893 self.assertNotEqual(initial_hash, hash(dataId)) 

1894 initial_hash = hash(dataId) 

1895 

1896 dataId.update(dict(instrument="foo")) 

1897 self.assertEqual(dataId["instrument"], "foo") 

1898 self.assertNotEqual(initial_hash, hash(dataId)) 

1899 initial_hash = hash(dataId) 

1900 

1901 

1902if __name__ == "__main__": 1902 ↛ 1903line 1902 didn't jump to line 1903, because the condition on line 1902 was never true

1903 unittest.main()