Coverage for tests/test_datastore.py: 12%

1071 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-03-31 02:41 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import os 

25import shutil 

26import sys 

27import tempfile 

28import time 

29import unittest 

30import unittest.mock 

31import uuid 

32from collections import UserDict 

33from dataclasses import dataclass 

34 

35import lsst.utils.tests 

36import yaml 

37from lsst.daf.butler import ( 

38 Config, 

39 DataCoordinate, 

40 DatasetRef, 

41 DatasetRefURIs, 

42 DatasetType, 

43 DatasetTypeNotSupportedError, 

44 Datastore, 

45 DatastoreCacheManager, 

46 DatastoreCacheManagerConfig, 

47 DatastoreConfig, 

48 DatastoreDisabledCacheManager, 

49 DatastoreValidationError, 

50 DimensionUniverse, 

51 FileDataset, 

52 NamedKeyDict, 

53 StorageClass, 

54 StorageClassFactory, 

55) 

56from lsst.daf.butler.formatters.yaml import YamlFormatter 

57from lsst.daf.butler.tests import ( 

58 BadNoWriteFormatter, 

59 BadWriteFormatter, 

60 DatasetTestHelper, 

61 DatastoreTestHelper, 

62 DummyRegistry, 

63 MetricsExample, 

64 MetricsExampleDataclass, 

65 MetricsExampleModel, 

66) 

67from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel 

68from lsst.resources import ResourcePath 

69from lsst.utils import doImport 

70 

71TESTDIR = os.path.dirname(__file__) 

72 

73 

74class DataIdForTest(UserDict): 

75 

76 """A dict-like class that can be used for a DataId dict that is hashable. 

77 

78 By default the class is immutable ("frozen"). The `frozen` 

79 attribute can be set to `False` to change values (but note that 

80 the hash values before and after mutation will be different!). 

81 """ 

82 

83 def __init__(self, *args, **kwargs): 

84 self.frozen = False 

85 super().__init__(*args, **kwargs) 

86 self.frozen = True 

87 

88 def __hash__(self): 

89 return hash(str(self.data)) 

90 

91 def __setitem__(self, k, v): 

92 if self.frozen: 

93 raise RuntimeError("DataIdForTest is frozen.") 

94 return super().__setitem__(k, v) 

95 

96 def __delitem__(self, k): 

97 if self.frozen: 

98 raise RuntimeError("DataIdForTest is frozen.") 

99 return super().__delitem__(k) 

100 

101 def __ior__(self, other): 

102 assert sys.version_info[0] == 3 

103 if sys.version_info[1] < 9: 

104 raise NotImplementedError("operator |= (ior) is not supported before version 3.9") 

105 if self.frozen: 

106 raise RuntimeError("DataIdForTest is frozen.") 

107 return super().__ior__(other) 

108 

109 def pop(self, k): 

110 if self.frozen: 

111 raise RuntimeError("DataIdForTest is frozen.") 

112 return super().pop(k) 

113 

114 def popitem(self): 

115 if self.frozen: 

116 raise RuntimeError("DataIdForTest is frozen.") 

117 return super().popitem() 

118 

119 def update(self, *args, **kwargs): 

120 if self.frozen: 

121 raise RuntimeError("DataIdForTest is frozen.") 

122 super().update(*args, **kwargs) 

123 

124 

125def makeExampleMetrics(use_none=False): 

126 if use_none: 

127 array = None 

128 else: 

129 array = [563, 234, 456.7, 105, 2054, -1045] 

130 return MetricsExample( 

131 {"AM1": 5.2, "AM2": 30.6}, 

132 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

133 array, 

134 ) 

135 

136 

137@dataclass(frozen=True) 

138class Named: 

139 name: str 

140 

141 

142class FakeDataCoordinate(NamedKeyDict): 

143 """A fake hashable frozen DataCoordinate built from a simple dict.""" 

144 

145 @classmethod 

146 def from_dict(cls, dataId): 

147 new = cls() 

148 for k, v in dataId.items(): 

149 new[Named(k)] = v 

150 return new.freeze() 

151 

152 def __hash__(self) -> int: 

153 return hash(frozenset(self.items())) 

154 

155 

156class TransactionTestError(Exception): 

157 """Specific error for transactions, to prevent misdiagnosing 

158 that might otherwise occur when a standard exception is used. 

159 """ 

160 

161 pass 

162 

163 

164class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper): 

165 """Support routines for datastore testing""" 

166 

167 root = None 

168 

169 @classmethod 

170 def setUpClass(cls): 

171 # Storage Classes are fixed for all datastores in these tests 

172 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

173 cls.storageClassFactory = StorageClassFactory() 

174 cls.storageClassFactory.addFromConfig(scConfigFile) 

175 

176 # Read the Datastore config so we can get the class 

177 # information (since we should not assume the constructor 

178 # name here, but rely on the configuration file itself) 

179 datastoreConfig = DatastoreConfig(cls.configFile) 

180 cls.datastoreType = doImport(datastoreConfig["cls"]) 

181 cls.universe = DimensionUniverse() 

182 

183 def setUp(self): 

184 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

185 

186 def tearDown(self): 

187 if self.root is not None and os.path.exists(self.root): 

188 shutil.rmtree(self.root, ignore_errors=True) 

189 

190 

191class DatastoreTests(DatastoreTestsBase): 

192 """Some basic tests of a simple datastore.""" 

193 

194 hasUnsupportedPut = True 

195 

196 def testConfigRoot(self): 

197 full = DatastoreConfig(self.configFile) 

198 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

199 newroot = "/random/location" 

200 self.datastoreType.setConfigRoot(newroot, config, full) 

201 if self.rootKeys: 

202 for k in self.rootKeys: 

203 self.assertIn(newroot, config[k]) 

204 

205 def testConstructor(self): 

206 datastore = self.makeDatastore() 

207 self.assertIsNotNone(datastore) 

208 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

209 

210 def testConfigurationValidation(self): 

211 datastore = self.makeDatastore() 

212 sc = self.storageClassFactory.getStorageClass("ThingOne") 

213 datastore.validateConfiguration([sc]) 

214 

215 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

216 if self.validationCanFail: 

217 with self.assertRaises(DatastoreValidationError): 

218 datastore.validateConfiguration([sc2], logFailures=True) 

219 

220 dimensions = self.universe.extract(("visit", "physical_filter")) 

221 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

222 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

223 datastore.validateConfiguration([ref]) 

224 

225 def testParameterValidation(self): 

226 """Check that parameters are validated""" 

227 sc = self.storageClassFactory.getStorageClass("ThingOne") 

228 dimensions = self.universe.extract(("visit", "physical_filter")) 

229 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

230 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

231 datastore = self.makeDatastore() 

232 data = {1: 2, 3: 4} 

233 datastore.put(data, ref) 

234 newdata = datastore.get(ref) 

235 self.assertEqual(data, newdata) 

236 with self.assertRaises(KeyError): 

237 newdata = datastore.get(ref, parameters={"missing": 5}) 

238 

239 def testBasicPutGet(self): 

240 metrics = makeExampleMetrics() 

241 datastore = self.makeDatastore() 

242 

243 # Create multiple storage classes for testing different formulations 

244 storageClasses = [ 

245 self.storageClassFactory.getStorageClass(sc) 

246 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle") 

247 ] 

248 

249 dimensions = self.universe.extract(("visit", "physical_filter")) 

250 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

251 dataId2 = DataIdForTest({"instrument": "dummy", "visit": 53, "physical_filter": "V"}) 

252 

253 for sc in storageClasses: 

254 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

255 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2, conform=False) 

256 

257 # Make sure that using getManyURIs without predicting before the 

258 # dataset has been put raises. 

259 with self.assertRaises(FileNotFoundError): 

260 datastore.getManyURIs([ref], predict=False) 

261 

262 # Make sure that using getManyURIs with predicting before the 

263 # dataset has been put predicts the URI. 

264 uris = datastore.getManyURIs([ref, ref2], predict=True) 

265 self.assertIn("52", uris[ref].primaryURI.geturl()) 

266 self.assertIn("#predicted", uris[ref].primaryURI.geturl()) 

267 self.assertIn("53", uris[ref2].primaryURI.geturl()) 

268 self.assertIn("#predicted", uris[ref2].primaryURI.geturl()) 

269 

270 datastore.put(metrics, ref) 

271 

272 # Does it exist? 

273 self.assertTrue(datastore.exists(ref)) 

274 self.assertTrue(datastore.knows(ref)) 

275 multi = datastore.knows_these([ref]) 

276 self.assertTrue(multi[ref]) 

277 multi = datastore.mexists([ref]) 

278 self.assertTrue(multi[ref]) 

279 

280 # Get 

281 metricsOut = datastore.get(ref, parameters=None) 

282 self.assertEqual(metrics, metricsOut) 

283 

284 uri = datastore.getURI(ref) 

285 self.assertEqual(uri.scheme, self.uriScheme) 

286 

287 uris = datastore.getManyURIs([ref]) 

288 self.assertEqual(len(uris), 1) 

289 ref, uri = uris.popitem() 

290 self.assertTrue(uri.primaryURI.exists()) 

291 self.assertFalse(uri.componentURIs) 

292 

293 # Get a component -- we need to construct new refs for them 

294 # with derived storage classes but with parent ID 

295 for comp in ("data", "output"): 

296 compRef = ref.makeComponentRef(comp) 

297 output = datastore.get(compRef) 

298 self.assertEqual(output, getattr(metricsOut, comp)) 

299 

300 uri = datastore.getURI(compRef) 

301 self.assertEqual(uri.scheme, self.uriScheme) 

302 

303 uris = datastore.getManyURIs([compRef]) 

304 self.assertEqual(len(uris), 1) 

305 

306 storageClass = sc 

307 

308 # Check that we can put a metric with None in a component and 

309 # get it back as None 

310 metricsNone = makeExampleMetrics(use_none=True) 

311 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"} 

312 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False) 

313 datastore.put(metricsNone, refNone) 

314 

315 comp = "data" 

316 for comp in ("data", "output"): 

317 compRef = refNone.makeComponentRef(comp) 

318 output = datastore.get(compRef) 

319 self.assertEqual(output, getattr(metricsNone, comp)) 

320 

321 # Check that a put fails if the dataset type is not supported 

322 if self.hasUnsupportedPut: 

323 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

324 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

325 with self.assertRaises(DatasetTypeNotSupportedError): 

326 datastore.put(metrics, ref) 

327 

328 # These should raise 

329 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=uuid.uuid4()) 

330 with self.assertRaises(FileNotFoundError): 

331 # non-existing file 

332 datastore.get(ref) 

333 

334 # Get a URI from it 

335 uri = datastore.getURI(ref, predict=True) 

336 self.assertEqual(uri.scheme, self.uriScheme) 

337 

338 with self.assertRaises(FileNotFoundError): 

339 datastore.getURI(ref) 

340 

341 def testTrustGetRequest(self): 

342 """Check that we can get datasets that registry knows nothing about.""" 

343 

344 datastore = self.makeDatastore() 

345 

346 # Skip test if the attribute is not defined 

347 if not hasattr(datastore, "trustGetRequest"): 

348 return 

349 

350 metrics = makeExampleMetrics() 

351 

352 i = 0 

353 for sc_name in ("StructuredDataNoComponents", "StructuredData", "StructuredComposite"): 

354 i += 1 

355 datasetTypeName = f"test_metric{i}" # Different dataset type name each time. 

356 

357 if sc_name == "StructuredComposite": 

358 disassembled = True 

359 else: 

360 disassembled = False 

361 

362 # Start datastore in default configuration of using registry 

363 datastore.trustGetRequest = False 

364 

365 # Create multiple storage classes for testing with or without 

366 # disassembly 

367 sc = self.storageClassFactory.getStorageClass(sc_name) 

368 dimensions = self.universe.extract(("visit", "physical_filter")) 

369 

370 dataId = DataIdForTest({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"}) 

371 

372 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

373 datastore.put(metrics, ref) 

374 

375 # Does it exist? 

376 self.assertTrue(datastore.exists(ref)) 

377 self.assertTrue(datastore.knows(ref)) 

378 multi = datastore.knows_these([ref]) 

379 self.assertTrue(multi[ref]) 

380 multi = datastore.mexists([ref]) 

381 self.assertTrue(multi[ref]) 

382 

383 # Get 

384 metricsOut = datastore.get(ref) 

385 self.assertEqual(metrics, metricsOut) 

386 

387 # Get the URI(s) 

388 primaryURI, componentURIs = datastore.getURIs(ref) 

389 if disassembled: 

390 self.assertIsNone(primaryURI) 

391 self.assertEqual(len(componentURIs), 3) 

392 else: 

393 self.assertIn(datasetTypeName, primaryURI.path) 

394 self.assertFalse(componentURIs) 

395 

396 # Delete registry entry so now we are trusting 

397 datastore.removeStoredItemInfo(ref) 

398 

399 # Now stop trusting and check that things break 

400 datastore.trustGetRequest = False 

401 

402 # Does it exist? 

403 self.assertFalse(datastore.exists(ref)) 

404 self.assertFalse(datastore.knows(ref)) 

405 multi = datastore.knows_these([ref]) 

406 self.assertFalse(multi[ref]) 

407 multi = datastore.mexists([ref]) 

408 self.assertFalse(multi[ref]) 

409 

410 with self.assertRaises(FileNotFoundError): 

411 datastore.get(ref) 

412 

413 if sc_name != "StructuredDataNoComponents": 

414 with self.assertRaises(FileNotFoundError): 

415 datastore.get(ref.makeComponentRef("data")) 

416 

417 # URI should fail unless we ask for prediction 

418 with self.assertRaises(FileNotFoundError): 

419 datastore.getURIs(ref) 

420 

421 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

422 if disassembled: 

423 self.assertIsNone(predicted_primary) 

424 self.assertEqual(len(predicted_disassembled), 3) 

425 for uri in predicted_disassembled.values(): 

426 self.assertEqual(uri.fragment, "predicted") 

427 self.assertIn(datasetTypeName, uri.path) 

428 else: 

429 self.assertIn(datasetTypeName, predicted_primary.path) 

430 self.assertFalse(predicted_disassembled) 

431 self.assertEqual(predicted_primary.fragment, "predicted") 

432 

433 # Now enable registry-free trusting mode 

434 datastore.trustGetRequest = True 

435 

436 # Try again to get it 

437 metricsOut = datastore.get(ref) 

438 self.assertEqual(metricsOut, metrics) 

439 

440 # Does it exist? 

441 self.assertTrue(datastore.exists(ref)) 

442 

443 # Get a component 

444 if sc_name != "StructuredDataNoComponents": 

445 comp = "data" 

446 compRef = ref.makeComponentRef(comp) 

447 output = datastore.get(compRef) 

448 self.assertEqual(output, getattr(metrics, comp)) 

449 

450 # Get the URI -- if we trust this should work even without 

451 # enabling prediction. 

452 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

453 self.assertEqual(primaryURI2, primaryURI) 

454 self.assertEqual(componentURIs2, componentURIs) 

455 

456 # Check for compatible storage class. 

457 if sc_name in ("StructuredDataNoComponents", "StructuredData"): 

458 # Make new dataset ref with compatible storage class. 

459 ref_comp = ref.overrideStorageClass("StructuredDataDictJson") 

460 

461 # Without `set_retrieve_dataset_type_method` it will fail to 

462 # find correct file. 

463 self.assertFalse(datastore.exists(ref_comp)) 

464 with self.assertRaises(FileNotFoundError): 

465 datastore.get(ref_comp) 

466 with self.assertRaises(FileNotFoundError): 

467 datastore.get(ref, storageClass="StructuredDataDictJson") 

468 

469 # Need a special method to generate stored dataset type. 

470 def _stored_dataset_type(name: str) -> DatasetType: 

471 if name == ref.datasetType.name: 

472 return ref.datasetType 

473 raise ValueError(f"Unexpected dataset type name {ref.datasetType.name}") 

474 

475 datastore.set_retrieve_dataset_type_method(_stored_dataset_type) 

476 

477 # Storage class override with original dataset ref. 

478 metrics_as_dict = datastore.get(ref, storageClass="StructuredDataDictJson") 

479 self.assertIsInstance(metrics_as_dict, dict) 

480 

481 # get() should return a dict now. 

482 metrics_as_dict = datastore.get(ref_comp) 

483 self.assertIsInstance(metrics_as_dict, dict) 

484 

485 # exists() should work as well. 

486 self.assertTrue(datastore.exists(ref_comp)) 

487 

488 datastore.set_retrieve_dataset_type_method(None) 

489 

490 def testDisassembly(self): 

491 """Test disassembly within datastore.""" 

492 metrics = makeExampleMetrics() 

493 if self.isEphemeral: 

494 # in-memory datastore does not disassemble 

495 return 

496 

497 # Create multiple storage classes for testing different formulations 

498 # of composites. One of these will not disassemble to provide 

499 # a reference. 

500 storageClasses = [ 

501 self.storageClassFactory.getStorageClass(sc) 

502 for sc in ( 

503 "StructuredComposite", 

504 "StructuredCompositeTestA", 

505 "StructuredCompositeTestB", 

506 "StructuredCompositeReadComp", 

507 "StructuredData", # No disassembly 

508 "StructuredCompositeReadCompNoDisassembly", 

509 ) 

510 ] 

511 

512 # Create the test datastore 

513 datastore = self.makeDatastore() 

514 

515 # Dummy dataId 

516 dimensions = self.universe.extract(("visit", "physical_filter")) 

517 dataId = DataIdForTest({"instrument": "dummy", "visit": 428, "physical_filter": "R"}) 

518 

519 for i, sc in enumerate(storageClasses): 

520 with self.subTest(storageClass=sc.name): 

521 # Create a different dataset type each time round 

522 # so that a test failure in this subtest does not trigger 

523 # a cascade of tests because of file clashes 

524 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, conform=False) 

525 

526 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

527 

528 datastore.put(metrics, ref) 

529 

530 baseURI, compURIs = datastore.getURIs(ref) 

531 if disassembled: 

532 self.assertIsNone(baseURI) 

533 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

534 else: 

535 self.assertIsNotNone(baseURI) 

536 self.assertEqual(compURIs, {}) 

537 

538 metrics_get = datastore.get(ref) 

539 self.assertEqual(metrics_get, metrics) 

540 

541 # Retrieve the composite with read parameter 

542 stop = 4 

543 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

544 self.assertEqual(metrics_get.summary, metrics.summary) 

545 self.assertEqual(metrics_get.output, metrics.output) 

546 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

547 

548 # Retrieve a component 

549 data = datastore.get(ref.makeComponentRef("data")) 

550 self.assertEqual(data, metrics.data) 

551 

552 # On supported storage classes attempt to access a read 

553 # only component 

554 if "ReadComp" in sc.name: 

555 cRef = ref.makeComponentRef("counter") 

556 counter = datastore.get(cRef) 

557 self.assertEqual(counter, len(metrics.data)) 

558 

559 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

560 self.assertEqual(counter, stop) 

561 

562 datastore.remove(ref) 

563 

564 def prepDeleteTest(self, n_refs=1): 

565 metrics = makeExampleMetrics() 

566 datastore = self.makeDatastore() 

567 # Put 

568 dimensions = self.universe.extract(("visit", "physical_filter")) 

569 sc = self.storageClassFactory.getStorageClass("StructuredData") 

570 refs = [] 

571 for i in range(n_refs): 

572 dataId = FakeDataCoordinate.from_dict( 

573 {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U"} 

574 ) 

575 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

576 datastore.put(metrics, ref) 

577 

578 # Does it exist? 

579 self.assertTrue(datastore.exists(ref)) 

580 

581 # Get 

582 metricsOut = datastore.get(ref) 

583 self.assertEqual(metrics, metricsOut) 

584 refs.append(ref) 

585 

586 return datastore, *refs 

587 

588 def testRemove(self): 

589 datastore, ref = self.prepDeleteTest() 

590 

591 # Remove 

592 datastore.remove(ref) 

593 

594 # Does it exist? 

595 self.assertFalse(datastore.exists(ref)) 

596 

597 # Do we now get a predicted URI? 

598 uri = datastore.getURI(ref, predict=True) 

599 self.assertEqual(uri.fragment, "predicted") 

600 

601 # Get should now fail 

602 with self.assertRaises(FileNotFoundError): 

603 datastore.get(ref) 

604 # Can only delete once 

605 with self.assertRaises(FileNotFoundError): 

606 datastore.remove(ref) 

607 

608 def testForget(self): 

609 datastore, ref = self.prepDeleteTest() 

610 

611 # Remove 

612 datastore.forget([ref]) 

613 

614 # Does it exist (as far as we know)? 

615 self.assertFalse(datastore.exists(ref)) 

616 

617 # Do we now get a predicted URI? 

618 uri = datastore.getURI(ref, predict=True) 

619 self.assertEqual(uri.fragment, "predicted") 

620 

621 # Get should now fail 

622 with self.assertRaises(FileNotFoundError): 

623 datastore.get(ref) 

624 

625 # Forgetting again is a silent no-op 

626 datastore.forget([ref]) 

627 

628 # Predicted URI should still point to the file. 

629 self.assertTrue(uri.exists()) 

630 

631 def testTransfer(self): 

632 metrics = makeExampleMetrics() 

633 

634 dimensions = self.universe.extract(("visit", "physical_filter")) 

635 dataId = DataIdForTest({"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"}) 

636 

637 sc = self.storageClassFactory.getStorageClass("StructuredData") 

638 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

639 

640 inputDatastore = self.makeDatastore("test_input_datastore") 

641 outputDatastore = self.makeDatastore("test_output_datastore") 

642 

643 inputDatastore.put(metrics, ref) 

644 outputDatastore.transfer(inputDatastore, ref) 

645 

646 metricsOut = outputDatastore.get(ref) 

647 self.assertEqual(metrics, metricsOut) 

648 

649 def testBasicTransaction(self): 

650 datastore = self.makeDatastore() 

651 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

652 dimensions = self.universe.extract(("visit", "physical_filter")) 

653 nDatasets = 6 

654 dataIds = [ 

655 DataIdForTest({"instrument": "dummy", "visit": i, "physical_filter": "V"}) 

656 for i in range(nDatasets) 

657 ] 

658 data = [ 

659 ( 

660 self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False), 

661 makeExampleMetrics(), 

662 ) 

663 for dataId in dataIds 

664 ] 

665 succeed = data[: nDatasets // 2] 

666 fail = data[nDatasets // 2 :] 

667 # All datasets added in this transaction should continue to exist 

668 with datastore.transaction(): 

669 for ref, metrics in succeed: 

670 datastore.put(metrics, ref) 

671 # Whereas datasets added in this transaction should not 

672 with self.assertRaises(TransactionTestError): 

673 with datastore.transaction(): 

674 for ref, metrics in fail: 

675 datastore.put(metrics, ref) 

676 raise TransactionTestError("This should propagate out of the context manager") 

677 # Check for datasets that should exist 

678 for ref, metrics in succeed: 

679 # Does it exist? 

680 self.assertTrue(datastore.exists(ref)) 

681 # Get 

682 metricsOut = datastore.get(ref, parameters=None) 

683 self.assertEqual(metrics, metricsOut) 

684 # URI 

685 uri = datastore.getURI(ref) 

686 self.assertEqual(uri.scheme, self.uriScheme) 

687 # Check for datasets that should not exist 

688 for ref, _ in fail: 

689 # These should raise 

690 with self.assertRaises(FileNotFoundError): 

691 # non-existing file 

692 datastore.get(ref) 

693 with self.assertRaises(FileNotFoundError): 

694 datastore.getURI(ref) 

695 

696 def testNestedTransaction(self): 

697 datastore = self.makeDatastore() 

698 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

699 dimensions = self.universe.extract(("visit", "physical_filter")) 

700 metrics = makeExampleMetrics() 

701 

702 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"}) 

703 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

704 datastore.put(metrics, refBefore) 

705 with self.assertRaises(TransactionTestError): 

706 with datastore.transaction(): 

707 dataId = DataIdForTest({"instrument": "dummy", "visit": 1, "physical_filter": "V"}) 

708 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

709 datastore.put(metrics, refOuter) 

710 with datastore.transaction(): 

711 dataId = DataIdForTest({"instrument": "dummy", "visit": 2, "physical_filter": "V"}) 

712 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

713 datastore.put(metrics, refInner) 

714 # All datasets should exist 

715 for ref in (refBefore, refOuter, refInner): 

716 metricsOut = datastore.get(ref, parameters=None) 

717 self.assertEqual(metrics, metricsOut) 

718 raise TransactionTestError("This should roll back the transaction") 

719 # Dataset(s) inserted before the transaction should still exist 

720 metricsOut = datastore.get(refBefore, parameters=None) 

721 self.assertEqual(metrics, metricsOut) 

722 # But all datasets inserted during the (rolled back) transaction 

723 # should be gone 

724 with self.assertRaises(FileNotFoundError): 

725 datastore.get(refOuter) 

726 with self.assertRaises(FileNotFoundError): 

727 datastore.get(refInner) 

728 

729 def _prepareIngestTest(self): 

730 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

731 dimensions = self.universe.extract(("visit", "physical_filter")) 

732 metrics = makeExampleMetrics() 

733 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"}) 

734 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

735 return metrics, ref 

736 

737 def runIngestTest(self, func, expectOutput=True): 

738 metrics, ref = self._prepareIngestTest() 

739 # The file will be deleted after the test. 

740 # For symlink tests this leads to a situation where the datastore 

741 # points to a file that does not exist. This will make os.path.exist 

742 # return False but then the new symlink will fail with 

743 # FileExistsError later in the code so the test still passes. 

744 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path: 

745 with open(path, "w") as fd: 

746 yaml.dump(metrics._asdict(), stream=fd) 

747 func(metrics, path, ref) 

748 

749 def testIngestNoTransfer(self): 

750 """Test ingesting existing files with no transfer.""" 

751 for mode in (None, "auto"): 

752 # Some datastores have auto but can't do in place transfer 

753 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

754 continue 

755 

756 with self.subTest(mode=mode): 

757 datastore = self.makeDatastore() 

758 

759 def succeed(obj, path, ref): 

760 """Ingest a file already in the datastore root.""" 

761 # first move it into the root, and adjust the path 

762 # accordingly 

763 path = shutil.copy(path, datastore.root.ospath) 

764 path = os.path.relpath(path, start=datastore.root.ospath) 

765 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

766 self.assertEqual(obj, datastore.get(ref)) 

767 

768 def failInputDoesNotExist(obj, path, ref): 

769 """Can't ingest files if we're given a bad path.""" 

770 with self.assertRaises(FileNotFoundError): 

771 datastore.ingest( 

772 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode 

773 ) 

774 self.assertFalse(datastore.exists(ref)) 

775 

776 def failOutsideRoot(obj, path, ref): 

777 """Can't ingest files outside of datastore root unless 

778 auto.""" 

779 if mode == "auto": 

780 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

781 self.assertTrue(datastore.exists(ref)) 

782 else: 

783 with self.assertRaises(RuntimeError): 

784 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

785 self.assertFalse(datastore.exists(ref)) 

786 

787 def failNotImplemented(obj, path, ref): 

788 with self.assertRaises(NotImplementedError): 

789 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

790 

791 if mode in self.ingestTransferModes: 

792 self.runIngestTest(failOutsideRoot) 

793 self.runIngestTest(failInputDoesNotExist) 

794 self.runIngestTest(succeed) 

795 else: 

796 self.runIngestTest(failNotImplemented) 

797 

798 def testIngestTransfer(self): 

799 """Test ingesting existing files after transferring them.""" 

800 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

801 with self.subTest(mode=mode): 

802 datastore = self.makeDatastore(mode) 

803 

804 def succeed(obj, path, ref): 

805 """Ingest a file by transferring it to the template 

806 location.""" 

807 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

808 self.assertEqual(obj, datastore.get(ref)) 

809 

810 def failInputDoesNotExist(obj, path, ref): 

811 """Can't ingest files if we're given a bad path.""" 

812 with self.assertRaises(FileNotFoundError): 

813 # Ensure the file does not look like it is in 

814 # datastore for auto mode 

815 datastore.ingest( 

816 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode 

817 ) 

818 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

819 

820 def failNotImplemented(obj, path, ref): 

821 with self.assertRaises(NotImplementedError): 

822 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

823 

824 if mode in self.ingestTransferModes: 

825 self.runIngestTest(failInputDoesNotExist) 

826 self.runIngestTest(succeed, expectOutput=(mode != "move")) 

827 else: 

828 self.runIngestTest(failNotImplemented) 

829 

830 def testIngestSymlinkOfSymlink(self): 

831 """Special test for symlink to a symlink ingest""" 

832 metrics, ref = self._prepareIngestTest() 

833 # The aim of this test is to create a dataset on disk, then 

834 # create a symlink to it and finally ingest the symlink such that 

835 # the symlink in the datastore points to the original dataset. 

836 for mode in ("symlink", "relsymlink"): 

837 if mode not in self.ingestTransferModes: 

838 continue 

839 

840 print(f"Trying mode {mode}") 

841 with lsst.utils.tests.getTempFilePath(".yaml") as realpath: 

842 with open(realpath, "w") as fd: 

843 yaml.dump(metrics._asdict(), stream=fd) 

844 with lsst.utils.tests.getTempFilePath(".yaml") as sympath: 

845 os.symlink(os.path.abspath(realpath), sympath) 

846 

847 datastore = self.makeDatastore() 

848 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

849 

850 uri = datastore.getURI(ref) 

851 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

852 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

853 

854 linkTarget = os.readlink(uri.ospath) 

855 if mode == "relsymlink": 

856 self.assertFalse(os.path.isabs(linkTarget)) 

857 else: 

858 self.assertEqual(linkTarget, os.path.abspath(realpath)) 

859 

860 # Check that we can get the dataset back regardless of mode 

861 metric2 = datastore.get(ref) 

862 self.assertEqual(metric2, metrics) 

863 

864 # Cleanup the file for next time round loop 

865 # since it will get the same file name in store 

866 datastore.remove(ref) 

867 

868 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]: 

869 datastore = self.makeDatastore(name) 

870 

871 # For now only the FileDatastore can be used for this test. 

872 # ChainedDatastore that only includes InMemoryDatastores have to be 

873 # skipped as well. 

874 for name in datastore.names: 

875 if not name.startswith("InMemoryDatastore"): 

876 break 

877 else: 

878 raise unittest.SkipTest("in-memory datastore does not support record export/import") 

879 

880 metrics = makeExampleMetrics() 

881 dimensions = self.universe.extract(("visit", "physical_filter")) 

882 sc = self.storageClassFactory.getStorageClass("StructuredData") 

883 

884 refs = [] 

885 for visit in (2048, 2049, 2050): 

886 dataId = FakeDataCoordinate.from_dict( 

887 {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime"} 

888 ) 

889 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

890 datastore.put(metrics, ref) 

891 refs.append(ref) 

892 return datastore, refs 

893 

894 def testExportImportRecords(self): 

895 """Test for export_records and import_records methods.""" 

896 datastore, refs = self._populate_export_datastore("test_datastore") 

897 for exported_refs in (refs, refs[1:]): 

898 n_refs = len(exported_refs) 

899 records = datastore.export_records(exported_refs) 

900 self.assertGreater(len(records), 0) 

901 self.assertTrue(set(records.keys()) <= set(datastore.names)) 

902 # In a ChainedDatastore each FileDatastore will have a complete set 

903 for datastore_name in records: 

904 record_data = records[datastore_name] 

905 self.assertEqual(len(record_data.records), n_refs) 

906 

907 # Use the same datastore name to import relative path. 

908 datastore2 = self.makeDatastore("test_datastore") 

909 

910 records = datastore.export_records(refs[1:]) 

911 datastore2.import_records(records) 

912 

913 with self.assertRaises(FileNotFoundError): 

914 data = datastore2.get(refs[0]) 

915 data = datastore2.get(refs[1]) 

916 self.assertIsNotNone(data) 

917 data = datastore2.get(refs[2]) 

918 self.assertIsNotNone(data) 

919 

920 def testExport(self): 

921 datastore, refs = self._populate_export_datastore("test_datastore") 

922 

923 datasets = list(datastore.export(refs)) 

924 self.assertEqual(len(datasets), 3) 

925 

926 for transfer in (None, "auto"): 

927 # Both will default to None 

928 datasets = list(datastore.export(refs, transfer=transfer)) 

929 self.assertEqual(len(datasets), 3) 

930 

931 with self.assertRaises(TypeError): 

932 list(datastore.export(refs, transfer="copy")) 

933 

934 with self.assertRaises(TypeError): 

935 list(datastore.export(refs, directory="exportDir", transfer="move")) 

936 

937 # Create a new ref that is not known to the datastore and try to 

938 # export it. 

939 sc = self.storageClassFactory.getStorageClass("ThingOne") 

940 dimensions = self.universe.extract(("visit", "physical_filter")) 

941 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

942 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

943 with self.assertRaises(FileNotFoundError): 

944 list(datastore.export(refs + [ref], transfer=None)) 

945 

946 def test_pydantic_dict_storage_class_conversions(self): 

947 """Test converting a dataset stored as a pydantic model into a dict on 

948 read. 

949 """ 

950 datastore = self.makeDatastore() 

951 store_as_model = self.makeDatasetRef( 

952 "store_as_model", 

953 dimensions=self.universe.empty, 

954 storageClass="DictConvertibleModel", 

955 dataId=DataCoordinate.makeEmpty(self.universe), 

956 ) 

957 content = {"a": "one", "b": "two"} 

958 model = DictConvertibleModel.from_dict(content, extra="original content") 

959 datastore.put(model, store_as_model) 

960 retrieved_model = datastore.get(store_as_model) 

961 self.assertEqual(retrieved_model, model) 

962 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel")) 

963 self.assertEqual(type(loaded), dict) 

964 self.assertEqual(loaded, content) 

965 

966 def test_simple_class_put_get(self): 

967 """Test that we can put and get a simple class with dict() 

968 constructor.""" 

969 datastore = self.makeDatastore() 

970 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

971 self._assert_different_puts(datastore, "MetricsExample", data) 

972 

973 def test_dataclass_put_get(self): 

974 """Test that we can put and get a simple dataclass.""" 

975 datastore = self.makeDatastore() 

976 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

977 self._assert_different_puts(datastore, "MetricsExampleDataclass", data) 

978 

979 def test_pydantic_put_get(self): 

980 """Test that we can put and get a simple Pydantic model.""" 

981 datastore = self.makeDatastore() 

982 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

983 self._assert_different_puts(datastore, "MetricsExampleModel", data) 

984 

985 def test_tuple_put_get(self): 

986 """Test that we can put and get a tuple.""" 

987 datastore = self.makeDatastore() 

988 data = tuple(["a", "b", 1]) 

989 self._assert_different_puts(datastore, "TupleExample", data) 

990 

991 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data) -> None: 

992 refs = { 

993 x: self.makeDatasetRef( 

994 f"stora_as_{x}", 

995 dimensions=self.universe.empty, 

996 storageClass=f"{storageClass_root}{x}", 

997 dataId=DataCoordinate.makeEmpty(self.universe), 

998 ) 

999 for x in ["A", "B"] 

1000 } 

1001 

1002 for ref in refs.values(): 

1003 datastore.put(data, ref) 

1004 

1005 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"])) 

1006 

1007 

1008class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1009 """PosixDatastore specialization""" 

1010 

1011 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1012 uriScheme = "file" 

1013 canIngestNoTransferAuto = True 

1014 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

1015 isEphemeral = False 

1016 rootKeys = ("root",) 

1017 validationCanFail = True 

1018 

1019 def setUp(self): 

1020 # Override the working directory before calling the base class 

1021 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1022 super().setUp() 

1023 

1024 def testAtomicWrite(self): 

1025 """Test that we write to a temporary and then rename""" 

1026 datastore = self.makeDatastore() 

1027 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1028 dimensions = self.universe.extract(("visit", "physical_filter")) 

1029 metrics = makeExampleMetrics() 

1030 

1031 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"}) 

1032 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

1033 

1034 with self.assertLogs("lsst.resources", "DEBUG") as cm: 

1035 datastore.put(metrics, ref) 

1036 move_logs = [ll for ll in cm.output if "transfer=" in ll] 

1037 self.assertIn("transfer=move", move_logs[0]) 

1038 

1039 # And the transfer should be file to file. 

1040 self.assertEqual(move_logs[0].count("file://"), 2) 

1041 

1042 def testCanNotDeterminePutFormatterLocation(self): 

1043 """Verify that the expected exception is raised if the FileDatastore 

1044 can not determine the put formatter location.""" 

1045 

1046 _ = makeExampleMetrics() 

1047 datastore = self.makeDatastore() 

1048 

1049 # Create multiple storage classes for testing different formulations 

1050 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1051 

1052 sccomp = StorageClass("Dummy") 

1053 compositeStorageClass = StorageClass( 

1054 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp} 

1055 ) 

1056 

1057 dimensions = self.universe.extract(("visit", "physical_filter")) 

1058 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1059 

1060 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

1061 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId, conform=False) 

1062 

1063 def raiser(ref): 

1064 raise DatasetTypeNotSupportedError() 

1065 

1066 with unittest.mock.patch.object( 

1067 lsst.daf.butler.datastores.fileDatastore.FileDatastore, 

1068 "_determine_put_formatter_location", 

1069 side_effect=raiser, 

1070 ): 

1071 # verify the non-composite ref execution path: 

1072 with self.assertRaises(DatasetTypeNotSupportedError): 

1073 datastore.getURIs(ref, predict=True) 

1074 

1075 # verify the composite-ref execution path: 

1076 with self.assertRaises(DatasetTypeNotSupportedError): 

1077 datastore.getURIs(compRef, predict=True) 

1078 

1079 

1080class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

1081 """Posix datastore tests but with checksums disabled.""" 

1082 

1083 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

1084 

1085 def testChecksum(self): 

1086 """Ensure that checksums have not been calculated.""" 

1087 

1088 datastore = self.makeDatastore() 

1089 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1090 dimensions = self.universe.extract(("visit", "physical_filter")) 

1091 metrics = makeExampleMetrics() 

1092 

1093 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"}) 

1094 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

1095 

1096 # Configuration should have disabled checksum calculation 

1097 datastore.put(metrics, ref) 

1098 infos = datastore.getStoredItemsInfo(ref) 

1099 self.assertIsNone(infos[0].checksum) 

1100 

1101 # Remove put back but with checksums enabled explicitly 

1102 datastore.remove(ref) 

1103 datastore.useChecksum = True 

1104 datastore.put(metrics, ref) 

1105 

1106 infos = datastore.getStoredItemsInfo(ref) 

1107 self.assertIsNotNone(infos[0].checksum) 

1108 

1109 

1110class TrashDatastoreTestCase(PosixDatastoreTestCase): 

1111 """Restrict trash test to FileDatastore.""" 

1112 

1113 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1114 

1115 def testTrash(self): 

1116 datastore, *refs = self.prepDeleteTest(n_refs=10) 

1117 

1118 # Trash one of them. 

1119 ref = refs.pop() 

1120 uri = datastore.getURI(ref) 

1121 datastore.trash(ref) 

1122 self.assertTrue(uri.exists(), uri) # Not deleted yet 

1123 datastore.emptyTrash() 

1124 self.assertFalse(uri.exists(), uri) 

1125 

1126 # Trash it again should be fine. 

1127 datastore.trash(ref) 

1128 

1129 # Trash multiple items at once. 

1130 subset = [refs.pop(), refs.pop()] 

1131 datastore.trash(subset) 

1132 datastore.emptyTrash() 

1133 

1134 # Remove a record and trash should do nothing. 

1135 # This is execution butler scenario. 

1136 ref = refs.pop() 

1137 uri = datastore.getURI(ref) 

1138 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

1139 self.assertTrue(uri.exists()) 

1140 datastore.trash(ref) 

1141 datastore.emptyTrash() 

1142 self.assertTrue(uri.exists()) 

1143 

1144 # Switch on trust and it should delete the file. 

1145 datastore.trustGetRequest = True 

1146 datastore.trash([ref]) 

1147 self.assertFalse(uri.exists()) 

1148 

1149 # Remove multiples at once in trust mode. 

1150 subset = [refs.pop() for i in range(3)] 

1151 datastore.trash(subset) 

1152 datastore.trash(refs.pop()) # Check that a single ref can trash 

1153 

1154 

1155class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

1156 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1157 

1158 def setUp(self): 

1159 # Override the working directory before calling the base class 

1160 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1161 super().setUp() 

1162 

1163 def testCleanup(self): 

1164 """Test that a failed formatter write does cleanup a partial file.""" 

1165 metrics = makeExampleMetrics() 

1166 datastore = self.makeDatastore() 

1167 

1168 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1169 

1170 dimensions = self.universe.extract(("visit", "physical_filter")) 

1171 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1172 

1173 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

1174 

1175 # Determine where the file will end up (we assume Formatters use 

1176 # the same file extension) 

1177 expectedUri = datastore.getURI(ref, predict=True) 

1178 self.assertEqual(expectedUri.fragment, "predicted") 

1179 

1180 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}") 

1181 

1182 # Try formatter that fails and formatter that fails and leaves 

1183 # a file behind 

1184 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

1185 with self.subTest(formatter=formatter): 

1186 # Monkey patch the formatter 

1187 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True) 

1188 

1189 # Try to put the dataset, it should fail 

1190 with self.assertRaises(Exception): 

1191 datastore.put(metrics, ref) 

1192 

1193 # Check that there is no file on disk 

1194 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1195 

1196 # Check that there is a directory 

1197 dir = expectedUri.dirname() 

1198 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}") 

1199 

1200 # Force YamlFormatter and check that this time a file is written 

1201 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True) 

1202 datastore.put(metrics, ref) 

1203 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1204 datastore.remove(ref) 

1205 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

1206 

1207 

1208class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1209 """PosixDatastore specialization""" 

1210 

1211 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

1212 uriScheme = "mem" 

1213 hasUnsupportedPut = False 

1214 ingestTransferModes = () 

1215 isEphemeral = True 

1216 rootKeys = None 

1217 validationCanFail = False 

1218 

1219 

1220class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

1221 """ChainedDatastore specialization using a POSIXDatastore""" 

1222 

1223 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

1224 hasUnsupportedPut = False 

1225 canIngestNoTransferAuto = False 

1226 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto") 

1227 isEphemeral = False 

1228 rootKeys = (".datastores.1.root", ".datastores.2.root") 

1229 validationCanFail = True 

1230 

1231 

1232class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

1233 """ChainedDatastore specialization using all InMemoryDatastore""" 

1234 

1235 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

1236 validationCanFail = False 

1237 

1238 

1239class DatastoreConstraintsTests(DatastoreTestsBase): 

1240 """Basic tests of constraints model of Datastores.""" 

1241 

1242 def testConstraints(self): 

1243 """Test constraints model. Assumes that each test class has the 

1244 same constraints.""" 

1245 metrics = makeExampleMetrics() 

1246 datastore = self.makeDatastore() 

1247 

1248 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1249 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1250 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1251 dataId = DataIdForTest({"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}) 

1252 

1253 # Write empty file suitable for ingest check (JSON and YAML variants) 

1254 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1255 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1256 for datasetTypeName, sc, accepted in ( 

1257 ("metric", sc1, True), 

1258 ("metric5", sc1, False), 

1259 ("metric33", sc1, True), 

1260 ("metric5", sc2, True), 

1261 ): 

1262 # Choose different temp file depending on StorageClass 

1263 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1264 

1265 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

1266 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

1267 if accepted: 

1268 datastore.put(metrics, ref) 

1269 self.assertTrue(datastore.exists(ref)) 

1270 datastore.remove(ref) 

1271 

1272 # Try ingest 

1273 if self.canIngest: 

1274 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1275 self.assertTrue(datastore.exists(ref)) 

1276 datastore.remove(ref) 

1277 else: 

1278 with self.assertRaises(DatasetTypeNotSupportedError): 

1279 datastore.put(metrics, ref) 

1280 self.assertFalse(datastore.exists(ref)) 

1281 

1282 # Again with ingest 

1283 if self.canIngest: 

1284 with self.assertRaises(DatasetTypeNotSupportedError): 

1285 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1286 self.assertFalse(datastore.exists(ref)) 

1287 

1288 

1289class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1290 """PosixDatastore specialization""" 

1291 

1292 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

1293 canIngest = True 

1294 

1295 def setUp(self): 

1296 # Override the working directory before calling the base class 

1297 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1298 super().setUp() 

1299 

1300 

1301class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1302 """InMemoryDatastore specialization""" 

1303 

1304 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

1305 canIngest = False 

1306 

1307 

1308class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

1309 """ChainedDatastore specialization using a POSIXDatastore and constraints 

1310 at the ChainedDatstore""" 

1311 

1312 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

1313 

1314 

1315class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

1316 """ChainedDatastore specialization using a POSIXDatastore""" 

1317 

1318 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

1319 

1320 

1321class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

1322 """ChainedDatastore specialization using all InMemoryDatastore""" 

1323 

1324 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

1325 canIngest = False 

1326 

1327 

1328class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

1329 """Test that a chained datastore can control constraints per-datastore 

1330 even if child datastore would accept.""" 

1331 

1332 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

1333 

1334 def setUp(self): 

1335 # Override the working directory before calling the base class 

1336 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1337 super().setUp() 

1338 

1339 def testConstraints(self): 

1340 """Test chained datastore constraints model.""" 

1341 metrics = makeExampleMetrics() 

1342 datastore = self.makeDatastore() 

1343 

1344 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1345 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1346 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1347 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

1348 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"} 

1349 

1350 # Write empty file suitable for ingest check (JSON and YAML variants) 

1351 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1352 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1353 

1354 for typeName, dataId, sc, accept, ingest in ( 

1355 ("metric", dataId1, sc1, (False, True, False), True), 

1356 ("metric5", dataId1, sc1, (False, False, False), False), 

1357 ("metric5", dataId2, sc1, (True, False, False), False), 

1358 ("metric33", dataId2, sc2, (True, True, False), True), 

1359 ("metric5", dataId1, sc2, (False, True, False), True), 

1360 ): 

1361 # Choose different temp file depending on StorageClass 

1362 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1363 

1364 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

1365 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, conform=False) 

1366 if any(accept): 

1367 datastore.put(metrics, ref) 

1368 self.assertTrue(datastore.exists(ref)) 

1369 

1370 # Check each datastore inside the chained datastore 

1371 for childDatastore, expected in zip(datastore.datastores, accept): 

1372 self.assertEqual( 

1373 childDatastore.exists(ref), 

1374 expected, 

1375 f"Testing presence of {ref} in datastore {childDatastore.name}", 

1376 ) 

1377 

1378 datastore.remove(ref) 

1379 

1380 # Check that ingest works 

1381 if ingest: 

1382 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1383 self.assertTrue(datastore.exists(ref)) 

1384 

1385 # Check each datastore inside the chained datastore 

1386 for childDatastore, expected in zip(datastore.datastores, accept): 

1387 # Ephemeral datastores means InMemory at the moment 

1388 # and that does not accept ingest of files. 

1389 if childDatastore.isEphemeral: 

1390 expected = False 

1391 self.assertEqual( 

1392 childDatastore.exists(ref), 

1393 expected, 

1394 f"Testing presence of ingested {ref} in datastore {childDatastore.name}", 

1395 ) 

1396 

1397 datastore.remove(ref) 

1398 else: 

1399 with self.assertRaises(DatasetTypeNotSupportedError): 

1400 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1401 

1402 else: 

1403 with self.assertRaises(DatasetTypeNotSupportedError): 

1404 datastore.put(metrics, ref) 

1405 self.assertFalse(datastore.exists(ref)) 

1406 

1407 # Again with ingest 

1408 with self.assertRaises(DatasetTypeNotSupportedError): 

1409 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1410 self.assertFalse(datastore.exists(ref)) 

1411 

1412 

1413class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase): 

1414 """Tests for datastore caching infrastructure.""" 

1415 

1416 @classmethod 

1417 def setUpClass(cls): 

1418 cls.storageClassFactory = StorageClassFactory() 

1419 cls.universe = DimensionUniverse() 

1420 

1421 # Ensure that we load the test storage class definitions. 

1422 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

1423 cls.storageClassFactory.addFromConfig(scConfigFile) 

1424 

1425 def setUp(self): 

1426 self.id = 0 

1427 

1428 # Create a root that we can use for caching tests. 

1429 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1430 

1431 # Create some test dataset refs and associated test files 

1432 sc = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1433 dimensions = self.universe.extract(("visit", "physical_filter")) 

1434 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1435 

1436 # Create list of refs and list of temporary files 

1437 n_datasets = 10 

1438 self.refs = [ 

1439 self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, conform=False) 

1440 for n in range(n_datasets) 

1441 ] 

1442 

1443 root_uri = ResourcePath(self.root, forceDirectory=True) 

1444 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)] 

1445 

1446 # Create test files. 

1447 for uri in self.files: 

1448 uri.write(b"0123456789") 

1449 

1450 # Create some composite refs with component files. 

1451 sc = self.storageClassFactory.getStorageClass("StructuredData") 

1452 self.composite_refs = [ 

1453 self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, conform=False) for n in range(3) 

1454 ] 

1455 self.comp_files = [] 

1456 self.comp_refs = [] 

1457 for n, ref in enumerate(self.composite_refs): 

1458 component_refs = [] 

1459 component_files = [] 

1460 for component in sc.components: 

1461 component_ref = ref.makeComponentRef(component) 

1462 file = root_uri.join(f"composite_file-{n}-{component}.txt") 

1463 component_refs.append(component_ref) 

1464 component_files.append(file) 

1465 file.write(b"9876543210") 

1466 

1467 self.comp_files.append(component_files) 

1468 self.comp_refs.append(component_refs) 

1469 

1470 def tearDown(self): 

1471 if self.root is not None and os.path.exists(self.root): 

1472 shutil.rmtree(self.root, ignore_errors=True) 

1473 

1474 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager: 

1475 config = Config.fromYaml(config_str) 

1476 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe) 

1477 

1478 def testNoCacheDir(self): 

1479 config_str = """ 

1480cached: 

1481 root: null 

1482 cacheable: 

1483 metric0: true 

1484 """ 

1485 cache_manager = self._make_cache_manager(config_str) 

1486 

1487 # Look inside to check we don't have a cache directory 

1488 self.assertIsNone(cache_manager._cache_directory) 

1489 

1490 self.assertCache(cache_manager) 

1491 

1492 # Test that the cache directory is marked temporary 

1493 self.assertTrue(cache_manager.cache_directory.isTemporary) 

1494 

1495 def testNoCacheDirReversed(self): 

1496 """Use default caching status and metric1 to false""" 

1497 config_str = """ 

1498cached: 

1499 root: null 

1500 default: true 

1501 cacheable: 

1502 metric1: false 

1503 """ 

1504 cache_manager = self._make_cache_manager(config_str) 

1505 

1506 self.assertCache(cache_manager) 

1507 

1508 def testEnvvarCacheDir(self): 

1509 config_str = f""" 

1510cached: 

1511 root: '{self.root}' 

1512 cacheable: 

1513 metric0: true 

1514 """ 

1515 

1516 root = ResourcePath(self.root, forceDirectory=True) 

1517 env_dir = root.join("somewhere", forceDirectory=True) 

1518 elsewhere = root.join("elsewhere", forceDirectory=True) 

1519 

1520 # Environment variable should override the config value. 

1521 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}): 

1522 cache_manager = self._make_cache_manager(config_str) 

1523 self.assertEqual(cache_manager.cache_directory, env_dir) 

1524 

1525 # This environment variable should not override the config value. 

1526 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1527 cache_manager = self._make_cache_manager(config_str) 

1528 self.assertEqual(cache_manager.cache_directory, root) 

1529 

1530 # No default setting. 

1531 config_str = """ 

1532cached: 

1533 root: null 

1534 default: true 

1535 cacheable: 

1536 metric1: false 

1537 """ 

1538 cache_manager = self._make_cache_manager(config_str) 

1539 

1540 # This environment variable should override the config value. 

1541 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1542 cache_manager = self._make_cache_manager(config_str) 

1543 self.assertEqual(cache_manager.cache_directory, env_dir) 

1544 

1545 # If both environment variables are set the main (not IF_UNSET) 

1546 # variable should win. 

1547 with unittest.mock.patch.dict( 

1548 os.environ, 

1549 { 

1550 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath, 

1551 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath, 

1552 }, 

1553 ): 

1554 cache_manager = self._make_cache_manager(config_str) 

1555 self.assertEqual(cache_manager.cache_directory, env_dir) 

1556 

1557 # Use the API to set the environment variable, making sure that the 

1558 # variable is reset on exit. 

1559 with unittest.mock.patch.dict( 

1560 os.environ, 

1561 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1562 ): 

1563 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1564 self.assertTrue(defined) 

1565 cache_manager = self._make_cache_manager(config_str) 

1566 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1567 

1568 # Now create the cache manager ahead of time and set the fallback 

1569 # later. 

1570 cache_manager = self._make_cache_manager(config_str) 

1571 self.assertIsNone(cache_manager._cache_directory) 

1572 with unittest.mock.patch.dict( 

1573 os.environ, 

1574 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1575 ): 

1576 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1577 self.assertTrue(defined) 

1578 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1579 

1580 def testExplicitCacheDir(self): 

1581 config_str = f""" 

1582cached: 

1583 root: '{self.root}' 

1584 cacheable: 

1585 metric0: true 

1586 """ 

1587 cache_manager = self._make_cache_manager(config_str) 

1588 

1589 # Look inside to check we do have a cache directory. 

1590 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True)) 

1591 

1592 self.assertCache(cache_manager) 

1593 

1594 # Test that the cache directory is not marked temporary 

1595 self.assertFalse(cache_manager.cache_directory.isTemporary) 

1596 

1597 def assertCache(self, cache_manager): 

1598 self.assertTrue(cache_manager.should_be_cached(self.refs[0])) 

1599 self.assertFalse(cache_manager.should_be_cached(self.refs[1])) 

1600 

1601 uri = cache_manager.move_to_cache(self.files[0], self.refs[0]) 

1602 self.assertIsInstance(uri, ResourcePath) 

1603 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1])) 

1604 

1605 # Check presence in cache using ref and then using file extension. 

1606 self.assertFalse(cache_manager.known_to_cache(self.refs[1])) 

1607 self.assertTrue(cache_manager.known_to_cache(self.refs[0])) 

1608 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension())) 

1609 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension())) 

1610 

1611 # Cached file should no longer exist but uncached file should be 

1612 # unaffected. 

1613 self.assertFalse(self.files[0].exists()) 

1614 self.assertTrue(self.files[1].exists()) 

1615 

1616 # Should find this file and it should be within the cache directory. 

1617 with cache_manager.find_in_cache(self.refs[0], ".txt") as found: 

1618 self.assertTrue(found.exists()) 

1619 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory)) 

1620 

1621 # Should not be able to find these in cache 

1622 with cache_manager.find_in_cache(self.refs[0], ".fits") as found: 

1623 self.assertIsNone(found) 

1624 with cache_manager.find_in_cache(self.refs[1], ".fits") as found: 

1625 self.assertIsNone(found) 

1626 

1627 def testNoCache(self): 

1628 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe) 

1629 for uri, ref in zip(self.files, self.refs): 

1630 self.assertFalse(cache_manager.should_be_cached(ref)) 

1631 self.assertIsNone(cache_manager.move_to_cache(uri, ref)) 

1632 self.assertFalse(cache_manager.known_to_cache(ref)) 

1633 with cache_manager.find_in_cache(ref, ".txt") as found: 

1634 self.assertIsNone(found, msg=f"{cache_manager}") 

1635 

1636 def _expiration_config(self, mode: str, threshold: int) -> str: 

1637 return f""" 

1638cached: 

1639 default: true 

1640 expiry: 

1641 mode: {mode} 

1642 threshold: {threshold} 

1643 cacheable: 

1644 unused: true 

1645 """ 

1646 

1647 def testCacheExpiryFiles(self): 

1648 threshold = 2 # Keep at least 2 files. 

1649 mode = "files" 

1650 config_str = self._expiration_config(mode, threshold) 

1651 

1652 cache_manager = self._make_cache_manager(config_str) 

1653 

1654 # Check that an empty cache returns unknown for arbitrary ref 

1655 self.assertFalse(cache_manager.known_to_cache(self.refs[0])) 

1656 

1657 # Should end with datasets: 2, 3, 4 

1658 self.assertExpiration(cache_manager, 5, threshold + 1) 

1659 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1660 

1661 # Check that we will not expire a file that is actively in use. 

1662 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1663 self.assertIsNotNone(found) 

1664 

1665 # Trigger cache expiration that should remove the file 

1666 # we just retrieved. Should now have: 3, 4, 5 

1667 cached = cache_manager.move_to_cache(self.files[5], self.refs[5]) 

1668 self.assertIsNotNone(cached) 

1669 

1670 # Cache should still report the standard file count. 

1671 self.assertEqual(cache_manager.file_count, threshold + 1) 

1672 

1673 # Add additional entry to cache. 

1674 # Should now have 4, 5, 6 

1675 cached = cache_manager.move_to_cache(self.files[6], self.refs[6]) 

1676 self.assertIsNotNone(cached) 

1677 

1678 # Is the file still there? 

1679 self.assertTrue(found.exists()) 

1680 

1681 # Can we read it? 

1682 data = found.read() 

1683 self.assertGreater(len(data), 0) 

1684 

1685 # Outside context the file should no longer exist. 

1686 self.assertFalse(found.exists()) 

1687 

1688 # File count should not have changed. 

1689 self.assertEqual(cache_manager.file_count, threshold + 1) 

1690 

1691 # Dataset 2 was in the exempt directory but because hardlinks 

1692 # are used it was deleted from the main cache during cache expiry 

1693 # above and so should no longer be found. 

1694 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1695 self.assertIsNone(found) 

1696 

1697 # And the one stored after it is also gone. 

1698 with cache_manager.find_in_cache(self.refs[3], ".txt") as found: 

1699 self.assertIsNone(found) 

1700 

1701 # But dataset 4 is present. 

1702 with cache_manager.find_in_cache(self.refs[4], ".txt") as found: 

1703 self.assertIsNotNone(found) 

1704 

1705 # Adding a new dataset to the cache should now delete it. 

1706 cache_manager.move_to_cache(self.files[7], self.refs[7]) 

1707 

1708 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1709 self.assertIsNone(found) 

1710 

1711 def testCacheExpiryDatasets(self): 

1712 threshold = 2 # Keep 2 datasets. 

1713 mode = "datasets" 

1714 config_str = self._expiration_config(mode, threshold) 

1715 

1716 cache_manager = self._make_cache_manager(config_str) 

1717 self.assertExpiration(cache_manager, 5, threshold + 1) 

1718 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1719 

1720 def testCacheExpiryDatasetsComposite(self): 

1721 threshold = 2 # Keep 2 datasets. 

1722 mode = "datasets" 

1723 config_str = self._expiration_config(mode, threshold) 

1724 

1725 cache_manager = self._make_cache_manager(config_str) 

1726 

1727 n_datasets = 3 

1728 for i in range(n_datasets): 

1729 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]): 

1730 cached = cache_manager.move_to_cache(component_file, component_ref) 

1731 self.assertIsNotNone(cached) 

1732 self.assertTrue(cache_manager.known_to_cache(component_ref)) 

1733 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef())) 

1734 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension())) 

1735 

1736 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files 

1737 

1738 # Write two new non-composite and the number of files should drop. 

1739 self.assertExpiration(cache_manager, 2, 5) 

1740 

1741 def testCacheExpirySize(self): 

1742 threshold = 55 # Each file is 10 bytes 

1743 mode = "size" 

1744 config_str = self._expiration_config(mode, threshold) 

1745 

1746 cache_manager = self._make_cache_manager(config_str) 

1747 self.assertExpiration(cache_manager, 10, 6) 

1748 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1749 

1750 def assertExpiration(self, cache_manager, n_datasets, n_retained): 

1751 """Insert the datasets and then check the number retained.""" 

1752 for i in range(n_datasets): 

1753 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1754 self.assertIsNotNone(cached) 

1755 

1756 self.assertEqual(cache_manager.file_count, n_retained) 

1757 

1758 # The oldest file should not be in the cache any more. 

1759 for i in range(n_datasets): 

1760 with cache_manager.find_in_cache(self.refs[i], ".txt") as found: 

1761 if i >= n_datasets - n_retained: 

1762 self.assertIsInstance(found, ResourcePath) 

1763 else: 

1764 self.assertIsNone(found) 

1765 

1766 def testCacheExpiryAge(self): 

1767 threshold = 1 # Expire older than 2 seconds 

1768 mode = "age" 

1769 config_str = self._expiration_config(mode, threshold) 

1770 

1771 cache_manager = self._make_cache_manager(config_str) 

1772 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1773 

1774 # Insert 3 files, then sleep, then insert more. 

1775 for i in range(2): 

1776 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1777 self.assertIsNotNone(cached) 

1778 time.sleep(2.0) 

1779 for j in range(4): 

1780 i = 2 + j # Continue the counting 

1781 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1782 self.assertIsNotNone(cached) 

1783 

1784 # Only the files written after the sleep should exist. 

1785 self.assertEqual(cache_manager.file_count, 4) 

1786 with cache_manager.find_in_cache(self.refs[1], ".txt") as found: 

1787 self.assertIsNone(found) 

1788 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1789 self.assertIsInstance(found, ResourcePath) 

1790 

1791 

1792class DatasetRefURIsTestCase(unittest.TestCase): 

1793 """Tests for DatasetRefURIs.""" 

1794 

1795 def testSequenceAccess(self): 

1796 """Verify that DatasetRefURIs can be treated like a two-item tuple.""" 

1797 uris = DatasetRefURIs() 

1798 

1799 self.assertEqual(len(uris), 2) 

1800 self.assertEqual(uris[0], None) 

1801 self.assertEqual(uris[1], {}) 

1802 

1803 primaryURI = ResourcePath("1/2/3") 

1804 componentURI = ResourcePath("a/b/c") 

1805 

1806 # affirm that DatasetRefURIs does not support MutableSequence functions 

1807 with self.assertRaises(TypeError): 

1808 uris[0] = primaryURI 

1809 with self.assertRaises(TypeError): 

1810 uris[1] = {"foo": componentURI} 

1811 

1812 # but DatasetRefURIs can be set by property name: 

1813 uris.primaryURI = primaryURI 

1814 uris.componentURIs = {"foo": componentURI} 

1815 self.assertEqual(uris.primaryURI, primaryURI) 

1816 self.assertEqual(uris[0], primaryURI) 

1817 

1818 primary, components = uris 

1819 self.assertEqual(primary, primaryURI) 

1820 self.assertEqual(components, {"foo": componentURI}) 

1821 

1822 def testRepr(self): 

1823 """Verify __repr__ output.""" 

1824 uris = DatasetRefURIs(ResourcePath("1/2/3"), {"comp": ResourcePath("a/b/c")}) 

1825 self.assertEqual( 

1826 repr(uris), 

1827 f'DatasetRefURIs(ResourcePath("{os.getcwd()}/1/2/3"), ' 

1828 f"{{'comp': ResourcePath(\"{os.getcwd()}/a/b/c\")}})", 

1829 ) 

1830 

1831 

1832class DataIdForTestTestCase(unittest.TestCase): 

1833 """Tests for the DataIdForTest class.""" 

1834 

1835 def testImmutable(self): 

1836 """Verify that an instance is immutable by default.""" 

1837 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1838 initial_hash = hash(dataId) 

1839 

1840 with self.assertRaises(RuntimeError): 

1841 dataId["instrument"] = "foo" 

1842 

1843 with self.assertRaises(RuntimeError): 

1844 del dataId["instrument"] 

1845 

1846 assert sys.version_info[0] == 3 

1847 if sys.version_info[1] >= 9: 

1848 with self.assertRaises(RuntimeError): 

1849 dataId |= dict(foo="bar") 

1850 

1851 with self.assertRaises(RuntimeError): 

1852 dataId.pop("instrument") 

1853 

1854 with self.assertRaises(RuntimeError): 

1855 dataId.popitem() 

1856 

1857 with self.assertRaises(RuntimeError): 

1858 dataId.update(dict(instrument="foo")) 

1859 

1860 # verify that the hash value has not changed. 

1861 self.assertEqual(initial_hash, hash(dataId)) 

1862 

1863 def testMutable(self): 

1864 """Verify that an instance can be made mutable (unfrozen).""" 

1865 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1866 initial_hash = hash(dataId) 

1867 dataId.frozen = False 

1868 self.assertEqual(initial_hash, hash(dataId)) 

1869 

1870 dataId["instrument"] = "foo" 

1871 self.assertEqual(dataId["instrument"], "foo") 

1872 self.assertNotEqual(initial_hash, hash(dataId)) 

1873 initial_hash = hash(dataId) 

1874 

1875 del dataId["instrument"] 

1876 self.assertTrue("instrument" not in dataId) 

1877 self.assertNotEqual(initial_hash, hash(dataId)) 

1878 initial_hash = hash(dataId) 

1879 

1880 assert sys.version_info[0] == 3 

1881 if sys.version_info[1] >= 9: 

1882 dataId |= dict(foo="bar") 

1883 self.assertEqual(dataId["foo"], "bar") 

1884 self.assertNotEqual(initial_hash, hash(dataId)) 

1885 initial_hash = hash(dataId) 

1886 

1887 dataId.pop("visit") 

1888 self.assertTrue("visit" not in dataId) 

1889 self.assertNotEqual(initial_hash, hash(dataId)) 

1890 initial_hash = hash(dataId) 

1891 

1892 dataId.popitem() 

1893 self.assertTrue("physical_filter" not in dataId) 

1894 self.assertNotEqual(initial_hash, hash(dataId)) 

1895 initial_hash = hash(dataId) 

1896 

1897 dataId.update(dict(instrument="foo")) 

1898 self.assertEqual(dataId["instrument"], "foo") 

1899 self.assertNotEqual(initial_hash, hash(dataId)) 

1900 initial_hash = hash(dataId) 

1901 

1902 

1903if __name__ == "__main__": 

1904 unittest.main()