Coverage for tests/test_datastore.py: 12%

1098 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-06-06 09:38 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import os 

25import pickle 

26import shutil 

27import sys 

28import tempfile 

29import time 

30import unittest 

31import unittest.mock 

32import uuid 

33from collections import UserDict 

34from dataclasses import dataclass 

35 

36import lsst.utils.tests 

37import yaml 

38from lsst.daf.butler import ( 

39 Config, 

40 DataCoordinate, 

41 DatasetRef, 

42 DatasetRefURIs, 

43 DatasetType, 

44 DatasetTypeNotSupportedError, 

45 Datastore, 

46 DatastoreCacheManager, 

47 DatastoreCacheManagerConfig, 

48 DatastoreConfig, 

49 DatastoreDisabledCacheManager, 

50 DatastoreValidationError, 

51 DimensionUniverse, 

52 FileDataset, 

53 NamedKeyDict, 

54 StorageClass, 

55 StorageClassFactory, 

56 StoredFileInfo, 

57) 

58from lsst.daf.butler.formatters.yaml import YamlFormatter 

59from lsst.daf.butler.tests import ( 

60 BadNoWriteFormatter, 

61 BadWriteFormatter, 

62 DatasetTestHelper, 

63 DatastoreTestHelper, 

64 DummyRegistry, 

65 MetricsExample, 

66 MetricsExampleDataclass, 

67 MetricsExampleModel, 

68) 

69from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel 

70from lsst.resources import ResourcePath 

71from lsst.utils import doImport 

72 

73TESTDIR = os.path.dirname(__file__) 

74 

75 

76class DataIdForTest(UserDict): 

77 

78 """A dict-like class that can be used for a DataId dict that is hashable. 

79 

80 By default the class is immutable ("frozen"). The `frozen` 

81 attribute can be set to `False` to change values (but note that 

82 the hash values before and after mutation will be different!). 

83 """ 

84 

85 def __init__(self, *args, **kwargs): 

86 self.frozen = False 

87 super().__init__(*args, **kwargs) 

88 self.frozen = True 

89 

90 def __hash__(self): 

91 return hash(str(self.data)) 

92 

93 def __setitem__(self, k, v): 

94 if self.frozen: 

95 raise RuntimeError("DataIdForTest is frozen.") 

96 return super().__setitem__(k, v) 

97 

98 def __delitem__(self, k): 

99 if self.frozen: 

100 raise RuntimeError("DataIdForTest is frozen.") 

101 return super().__delitem__(k) 

102 

103 def __ior__(self, other): 

104 assert sys.version_info[0] == 3 

105 if sys.version_info[1] < 9: 

106 raise NotImplementedError("operator |= (ior) is not supported before version 3.9") 

107 if self.frozen: 

108 raise RuntimeError("DataIdForTest is frozen.") 

109 return super().__ior__(other) 

110 

111 def pop(self, k): 

112 if self.frozen: 

113 raise RuntimeError("DataIdForTest is frozen.") 

114 return super().pop(k) 

115 

116 def popitem(self): 

117 if self.frozen: 

118 raise RuntimeError("DataIdForTest is frozen.") 

119 return super().popitem() 

120 

121 def update(self, *args, **kwargs): 

122 if self.frozen: 

123 raise RuntimeError("DataIdForTest is frozen.") 

124 super().update(*args, **kwargs) 

125 

126 

127def makeExampleMetrics(use_none=False): 

128 if use_none: 

129 array = None 

130 else: 

131 array = [563, 234, 456.7, 105, 2054, -1045] 

132 return MetricsExample( 

133 {"AM1": 5.2, "AM2": 30.6}, 

134 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

135 array, 

136 ) 

137 

138 

139@dataclass(frozen=True) 

140class Named: 

141 name: str 

142 

143 

144class FakeDataCoordinate(NamedKeyDict): 

145 """A fake hashable frozen DataCoordinate built from a simple dict.""" 

146 

147 @classmethod 

148 def from_dict(cls, dataId): 

149 new = cls() 

150 for k, v in dataId.items(): 

151 new[Named(k)] = v 

152 return new.freeze() 

153 

154 def __hash__(self) -> int: 

155 return hash(frozenset(self.items())) 

156 

157 

158class TransactionTestError(Exception): 

159 """Specific error for transactions, to prevent misdiagnosing 

160 that might otherwise occur when a standard exception is used. 

161 """ 

162 

163 pass 

164 

165 

166class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper): 

167 """Support routines for datastore testing""" 

168 

169 root = None 

170 

171 @classmethod 

172 def setUpClass(cls): 

173 # Storage Classes are fixed for all datastores in these tests 

174 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

175 cls.storageClassFactory = StorageClassFactory() 

176 cls.storageClassFactory.addFromConfig(scConfigFile) 

177 

178 # Read the Datastore config so we can get the class 

179 # information (since we should not assume the constructor 

180 # name here, but rely on the configuration file itself) 

181 datastoreConfig = DatastoreConfig(cls.configFile) 

182 cls.datastoreType = doImport(datastoreConfig["cls"]) 

183 cls.universe = DimensionUniverse() 

184 

185 def setUp(self): 

186 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

187 

188 def tearDown(self): 

189 if self.root is not None and os.path.exists(self.root): 

190 shutil.rmtree(self.root, ignore_errors=True) 

191 

192 

193class DatastoreTests(DatastoreTestsBase): 

194 """Some basic tests of a simple datastore.""" 

195 

196 hasUnsupportedPut = True 

197 

198 def testConfigRoot(self): 

199 full = DatastoreConfig(self.configFile) 

200 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

201 newroot = "/random/location" 

202 self.datastoreType.setConfigRoot(newroot, config, full) 

203 if self.rootKeys: 

204 for k in self.rootKeys: 

205 self.assertIn(newroot, config[k]) 

206 

207 def testConstructor(self): 

208 datastore = self.makeDatastore() 

209 self.assertIsNotNone(datastore) 

210 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

211 

212 def testConfigurationValidation(self): 

213 datastore = self.makeDatastore() 

214 sc = self.storageClassFactory.getStorageClass("ThingOne") 

215 datastore.validateConfiguration([sc]) 

216 

217 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

218 if self.validationCanFail: 

219 with self.assertRaises(DatastoreValidationError): 

220 datastore.validateConfiguration([sc2], logFailures=True) 

221 

222 dimensions = self.universe.extract(("visit", "physical_filter")) 

223 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

224 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

225 datastore.validateConfiguration([ref]) 

226 

227 def testParameterValidation(self): 

228 """Check that parameters are validated""" 

229 sc = self.storageClassFactory.getStorageClass("ThingOne") 

230 dimensions = self.universe.extract(("visit", "physical_filter")) 

231 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

232 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

233 datastore = self.makeDatastore() 

234 data = {1: 2, 3: 4} 

235 datastore.put(data, ref) 

236 newdata = datastore.get(ref) 

237 self.assertEqual(data, newdata) 

238 with self.assertRaises(KeyError): 

239 newdata = datastore.get(ref, parameters={"missing": 5}) 

240 

241 def testBasicPutGet(self): 

242 metrics = makeExampleMetrics() 

243 datastore = self.makeDatastore() 

244 

245 # Create multiple storage classes for testing different formulations 

246 storageClasses = [ 

247 self.storageClassFactory.getStorageClass(sc) 

248 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle") 

249 ] 

250 

251 dimensions = self.universe.extract(("visit", "physical_filter")) 

252 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

253 dataId2 = DataIdForTest({"instrument": "dummy", "visit": 53, "physical_filter": "V"}) 

254 

255 for sc in storageClasses: 

256 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

257 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2, conform=False) 

258 

259 # Make sure that using getManyURIs without predicting before the 

260 # dataset has been put raises. 

261 with self.assertRaises(FileNotFoundError): 

262 datastore.getManyURIs([ref], predict=False) 

263 

264 # Make sure that using getManyURIs with predicting before the 

265 # dataset has been put predicts the URI. 

266 uris = datastore.getManyURIs([ref, ref2], predict=True) 

267 self.assertIn("52", uris[ref].primaryURI.geturl()) 

268 self.assertIn("#predicted", uris[ref].primaryURI.geturl()) 

269 self.assertIn("53", uris[ref2].primaryURI.geturl()) 

270 self.assertIn("#predicted", uris[ref2].primaryURI.geturl()) 

271 

272 datastore.put(metrics, ref) 

273 

274 # Does it exist? 

275 self.assertTrue(datastore.exists(ref)) 

276 self.assertTrue(datastore.knows(ref)) 

277 multi = datastore.knows_these([ref]) 

278 self.assertTrue(multi[ref]) 

279 multi = datastore.mexists([ref]) 

280 self.assertTrue(multi[ref]) 

281 

282 # Get 

283 metricsOut = datastore.get(ref, parameters=None) 

284 self.assertEqual(metrics, metricsOut) 

285 

286 uri = datastore.getURI(ref) 

287 self.assertEqual(uri.scheme, self.uriScheme) 

288 

289 uris = datastore.getManyURIs([ref]) 

290 self.assertEqual(len(uris), 1) 

291 ref, uri = uris.popitem() 

292 self.assertTrue(uri.primaryURI.exists()) 

293 self.assertFalse(uri.componentURIs) 

294 

295 # Get a component -- we need to construct new refs for them 

296 # with derived storage classes but with parent ID 

297 for comp in ("data", "output"): 

298 compRef = ref.makeComponentRef(comp) 

299 output = datastore.get(compRef) 

300 self.assertEqual(output, getattr(metricsOut, comp)) 

301 

302 uri = datastore.getURI(compRef) 

303 self.assertEqual(uri.scheme, self.uriScheme) 

304 

305 uris = datastore.getManyURIs([compRef]) 

306 self.assertEqual(len(uris), 1) 

307 

308 storageClass = sc 

309 

310 # Check that we can put a metric with None in a component and 

311 # get it back as None 

312 metricsNone = makeExampleMetrics(use_none=True) 

313 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"} 

314 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False) 

315 datastore.put(metricsNone, refNone) 

316 

317 comp = "data" 

318 for comp in ("data", "output"): 

319 compRef = refNone.makeComponentRef(comp) 

320 output = datastore.get(compRef) 

321 self.assertEqual(output, getattr(metricsNone, comp)) 

322 

323 # Check that a put fails if the dataset type is not supported 

324 if self.hasUnsupportedPut: 

325 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

326 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

327 with self.assertRaises(DatasetTypeNotSupportedError): 

328 datastore.put(metrics, ref) 

329 

330 # These should raise 

331 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId) 

332 with self.assertRaises(FileNotFoundError): 

333 # non-existing file 

334 datastore.get(ref) 

335 

336 # Get a URI from it 

337 uri = datastore.getURI(ref, predict=True) 

338 self.assertEqual(uri.scheme, self.uriScheme) 

339 

340 with self.assertRaises(FileNotFoundError): 

341 datastore.getURI(ref) 

342 

343 def testTrustGetRequest(self): 

344 """Check that we can get datasets that registry knows nothing about.""" 

345 

346 datastore = self.makeDatastore() 

347 

348 # Skip test if the attribute is not defined 

349 if not hasattr(datastore, "trustGetRequest"): 

350 return 

351 

352 metrics = makeExampleMetrics() 

353 

354 i = 0 

355 for sc_name in ("StructuredDataNoComponents", "StructuredData", "StructuredComposite"): 

356 i += 1 

357 datasetTypeName = f"test_metric{i}" # Different dataset type name each time. 

358 

359 if sc_name == "StructuredComposite": 

360 disassembled = True 

361 else: 

362 disassembled = False 

363 

364 # Start datastore in default configuration of using registry 

365 datastore.trustGetRequest = False 

366 

367 # Create multiple storage classes for testing with or without 

368 # disassembly 

369 sc = self.storageClassFactory.getStorageClass(sc_name) 

370 dimensions = self.universe.extract(("visit", "physical_filter")) 

371 

372 dataId = DataIdForTest({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"}) 

373 

374 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

375 datastore.put(metrics, ref) 

376 

377 # Does it exist? 

378 self.assertTrue(datastore.exists(ref)) 

379 self.assertTrue(datastore.knows(ref)) 

380 multi = datastore.knows_these([ref]) 

381 self.assertTrue(multi[ref]) 

382 multi = datastore.mexists([ref]) 

383 self.assertTrue(multi[ref]) 

384 

385 # Get 

386 metricsOut = datastore.get(ref) 

387 self.assertEqual(metrics, metricsOut) 

388 

389 # Get the URI(s) 

390 primaryURI, componentURIs = datastore.getURIs(ref) 

391 if disassembled: 

392 self.assertIsNone(primaryURI) 

393 self.assertEqual(len(componentURIs), 3) 

394 else: 

395 self.assertIn(datasetTypeName, primaryURI.path) 

396 self.assertFalse(componentURIs) 

397 

398 # Delete registry entry so now we are trusting 

399 datastore.removeStoredItemInfo(ref) 

400 

401 # Now stop trusting and check that things break 

402 datastore.trustGetRequest = False 

403 

404 # Does it exist? 

405 self.assertFalse(datastore.exists(ref)) 

406 self.assertFalse(datastore.knows(ref)) 

407 multi = datastore.knows_these([ref]) 

408 self.assertFalse(multi[ref]) 

409 multi = datastore.mexists([ref]) 

410 self.assertFalse(multi[ref]) 

411 

412 with self.assertRaises(FileNotFoundError): 

413 datastore.get(ref) 

414 

415 if sc_name != "StructuredDataNoComponents": 

416 with self.assertRaises(FileNotFoundError): 

417 datastore.get(ref.makeComponentRef("data")) 

418 

419 # URI should fail unless we ask for prediction 

420 with self.assertRaises(FileNotFoundError): 

421 datastore.getURIs(ref) 

422 

423 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

424 if disassembled: 

425 self.assertIsNone(predicted_primary) 

426 self.assertEqual(len(predicted_disassembled), 3) 

427 for uri in predicted_disassembled.values(): 

428 self.assertEqual(uri.fragment, "predicted") 

429 self.assertIn(datasetTypeName, uri.path) 

430 else: 

431 self.assertIn(datasetTypeName, predicted_primary.path) 

432 self.assertFalse(predicted_disassembled) 

433 self.assertEqual(predicted_primary.fragment, "predicted") 

434 

435 # Now enable registry-free trusting mode 

436 datastore.trustGetRequest = True 

437 

438 # Try again to get it 

439 metricsOut = datastore.get(ref) 

440 self.assertEqual(metricsOut, metrics) 

441 

442 # Does it exist? 

443 self.assertTrue(datastore.exists(ref)) 

444 

445 # Get a component 

446 if sc_name != "StructuredDataNoComponents": 

447 comp = "data" 

448 compRef = ref.makeComponentRef(comp) 

449 output = datastore.get(compRef) 

450 self.assertEqual(output, getattr(metrics, comp)) 

451 

452 # Get the URI -- if we trust this should work even without 

453 # enabling prediction. 

454 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

455 self.assertEqual(primaryURI2, primaryURI) 

456 self.assertEqual(componentURIs2, componentURIs) 

457 

458 # Check for compatible storage class. 

459 if sc_name in ("StructuredDataNoComponents", "StructuredData"): 

460 # Make new dataset ref with compatible storage class. 

461 ref_comp = ref.overrideStorageClass("StructuredDataDictJson") 

462 

463 # Without `set_retrieve_dataset_type_method` it will fail to 

464 # find correct file. 

465 self.assertFalse(datastore.exists(ref_comp)) 

466 with self.assertRaises(FileNotFoundError): 

467 datastore.get(ref_comp) 

468 with self.assertRaises(FileNotFoundError): 

469 datastore.get(ref, storageClass="StructuredDataDictJson") 

470 

471 # Need a special method to generate stored dataset type. 

472 def _stored_dataset_type(name: str) -> DatasetType: 

473 if name == ref.datasetType.name: 

474 return ref.datasetType 

475 raise ValueError(f"Unexpected dataset type name {ref.datasetType.name}") 

476 

477 datastore.set_retrieve_dataset_type_method(_stored_dataset_type) 

478 

479 # Storage class override with original dataset ref. 

480 metrics_as_dict = datastore.get(ref, storageClass="StructuredDataDictJson") 

481 self.assertIsInstance(metrics_as_dict, dict) 

482 

483 # get() should return a dict now. 

484 metrics_as_dict = datastore.get(ref_comp) 

485 self.assertIsInstance(metrics_as_dict, dict) 

486 

487 # exists() should work as well. 

488 self.assertTrue(datastore.exists(ref_comp)) 

489 

490 datastore.set_retrieve_dataset_type_method(None) 

491 

492 def testDisassembly(self): 

493 """Test disassembly within datastore.""" 

494 metrics = makeExampleMetrics() 

495 if self.isEphemeral: 

496 # in-memory datastore does not disassemble 

497 return 

498 

499 # Create multiple storage classes for testing different formulations 

500 # of composites. One of these will not disassemble to provide 

501 # a reference. 

502 storageClasses = [ 

503 self.storageClassFactory.getStorageClass(sc) 

504 for sc in ( 

505 "StructuredComposite", 

506 "StructuredCompositeTestA", 

507 "StructuredCompositeTestB", 

508 "StructuredCompositeReadComp", 

509 "StructuredData", # No disassembly 

510 "StructuredCompositeReadCompNoDisassembly", 

511 ) 

512 ] 

513 

514 # Create the test datastore 

515 datastore = self.makeDatastore() 

516 

517 # Dummy dataId 

518 dimensions = self.universe.extract(("visit", "physical_filter")) 

519 dataId = DataIdForTest({"instrument": "dummy", "visit": 428, "physical_filter": "R"}) 

520 

521 for i, sc in enumerate(storageClasses): 

522 with self.subTest(storageClass=sc.name): 

523 # Create a different dataset type each time round 

524 # so that a test failure in this subtest does not trigger 

525 # a cascade of tests because of file clashes 

526 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, conform=False) 

527 

528 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

529 

530 datastore.put(metrics, ref) 

531 

532 baseURI, compURIs = datastore.getURIs(ref) 

533 if disassembled: 

534 self.assertIsNone(baseURI) 

535 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

536 else: 

537 self.assertIsNotNone(baseURI) 

538 self.assertEqual(compURIs, {}) 

539 

540 metrics_get = datastore.get(ref) 

541 self.assertEqual(metrics_get, metrics) 

542 

543 # Retrieve the composite with read parameter 

544 stop = 4 

545 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

546 self.assertEqual(metrics_get.summary, metrics.summary) 

547 self.assertEqual(metrics_get.output, metrics.output) 

548 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

549 

550 # Retrieve a component 

551 data = datastore.get(ref.makeComponentRef("data")) 

552 self.assertEqual(data, metrics.data) 

553 

554 # On supported storage classes attempt to access a read 

555 # only component 

556 if "ReadComp" in sc.name: 

557 cRef = ref.makeComponentRef("counter") 

558 counter = datastore.get(cRef) 

559 self.assertEqual(counter, len(metrics.data)) 

560 

561 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

562 self.assertEqual(counter, stop) 

563 

564 datastore.remove(ref) 

565 

566 def prepDeleteTest(self, n_refs=1): 

567 metrics = makeExampleMetrics() 

568 datastore = self.makeDatastore() 

569 # Put 

570 dimensions = self.universe.extract(("visit", "physical_filter")) 

571 sc = self.storageClassFactory.getStorageClass("StructuredData") 

572 refs = [] 

573 for i in range(n_refs): 

574 dataId = FakeDataCoordinate.from_dict( 

575 {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U"} 

576 ) 

577 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

578 datastore.put(metrics, ref) 

579 

580 # Does it exist? 

581 self.assertTrue(datastore.exists(ref)) 

582 

583 # Get 

584 metricsOut = datastore.get(ref) 

585 self.assertEqual(metrics, metricsOut) 

586 refs.append(ref) 

587 

588 return datastore, *refs 

589 

590 def testRemove(self): 

591 datastore, ref = self.prepDeleteTest() 

592 

593 # Remove 

594 datastore.remove(ref) 

595 

596 # Does it exist? 

597 self.assertFalse(datastore.exists(ref)) 

598 

599 # Do we now get a predicted URI? 

600 uri = datastore.getURI(ref, predict=True) 

601 self.assertEqual(uri.fragment, "predicted") 

602 

603 # Get should now fail 

604 with self.assertRaises(FileNotFoundError): 

605 datastore.get(ref) 

606 # Can only delete once 

607 with self.assertRaises(FileNotFoundError): 

608 datastore.remove(ref) 

609 

610 def testForget(self): 

611 datastore, ref = self.prepDeleteTest() 

612 

613 # Remove 

614 datastore.forget([ref]) 

615 

616 # Does it exist (as far as we know)? 

617 self.assertFalse(datastore.exists(ref)) 

618 

619 # Do we now get a predicted URI? 

620 uri = datastore.getURI(ref, predict=True) 

621 self.assertEqual(uri.fragment, "predicted") 

622 

623 # Get should now fail 

624 with self.assertRaises(FileNotFoundError): 

625 datastore.get(ref) 

626 

627 # Forgetting again is a silent no-op 

628 datastore.forget([ref]) 

629 

630 # Predicted URI should still point to the file. 

631 self.assertTrue(uri.exists()) 

632 

633 def testTransfer(self): 

634 metrics = makeExampleMetrics() 

635 

636 dimensions = self.universe.extract(("visit", "physical_filter")) 

637 dataId = DataIdForTest({"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"}) 

638 

639 sc = self.storageClassFactory.getStorageClass("StructuredData") 

640 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

641 

642 inputDatastore = self.makeDatastore("test_input_datastore") 

643 outputDatastore = self.makeDatastore("test_output_datastore") 

644 

645 inputDatastore.put(metrics, ref) 

646 outputDatastore.transfer(inputDatastore, ref) 

647 

648 metricsOut = outputDatastore.get(ref) 

649 self.assertEqual(metrics, metricsOut) 

650 

651 def testBasicTransaction(self): 

652 datastore = self.makeDatastore() 

653 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

654 dimensions = self.universe.extract(("visit", "physical_filter")) 

655 nDatasets = 6 

656 dataIds = [ 

657 DataIdForTest({"instrument": "dummy", "visit": i, "physical_filter": "V"}) 

658 for i in range(nDatasets) 

659 ] 

660 data = [ 

661 ( 

662 self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False), 

663 makeExampleMetrics(), 

664 ) 

665 for dataId in dataIds 

666 ] 

667 succeed = data[: nDatasets // 2] 

668 fail = data[nDatasets // 2 :] 

669 # All datasets added in this transaction should continue to exist 

670 with datastore.transaction(): 

671 for ref, metrics in succeed: 

672 datastore.put(metrics, ref) 

673 # Whereas datasets added in this transaction should not 

674 with self.assertRaises(TransactionTestError): 

675 with datastore.transaction(): 

676 for ref, metrics in fail: 

677 datastore.put(metrics, ref) 

678 raise TransactionTestError("This should propagate out of the context manager") 

679 # Check for datasets that should exist 

680 for ref, metrics in succeed: 

681 # Does it exist? 

682 self.assertTrue(datastore.exists(ref)) 

683 # Get 

684 metricsOut = datastore.get(ref, parameters=None) 

685 self.assertEqual(metrics, metricsOut) 

686 # URI 

687 uri = datastore.getURI(ref) 

688 self.assertEqual(uri.scheme, self.uriScheme) 

689 # Check for datasets that should not exist 

690 for ref, _ in fail: 

691 # These should raise 

692 with self.assertRaises(FileNotFoundError): 

693 # non-existing file 

694 datastore.get(ref) 

695 with self.assertRaises(FileNotFoundError): 

696 datastore.getURI(ref) 

697 

698 def testNestedTransaction(self): 

699 datastore = self.makeDatastore() 

700 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

701 dimensions = self.universe.extract(("visit", "physical_filter")) 

702 metrics = makeExampleMetrics() 

703 

704 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"}) 

705 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

706 datastore.put(metrics, refBefore) 

707 with self.assertRaises(TransactionTestError): 

708 with datastore.transaction(): 

709 dataId = DataIdForTest({"instrument": "dummy", "visit": 1, "physical_filter": "V"}) 

710 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

711 datastore.put(metrics, refOuter) 

712 with datastore.transaction(): 

713 dataId = DataIdForTest({"instrument": "dummy", "visit": 2, "physical_filter": "V"}) 

714 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

715 datastore.put(metrics, refInner) 

716 # All datasets should exist 

717 for ref in (refBefore, refOuter, refInner): 

718 metricsOut = datastore.get(ref, parameters=None) 

719 self.assertEqual(metrics, metricsOut) 

720 raise TransactionTestError("This should roll back the transaction") 

721 # Dataset(s) inserted before the transaction should still exist 

722 metricsOut = datastore.get(refBefore, parameters=None) 

723 self.assertEqual(metrics, metricsOut) 

724 # But all datasets inserted during the (rolled back) transaction 

725 # should be gone 

726 with self.assertRaises(FileNotFoundError): 

727 datastore.get(refOuter) 

728 with self.assertRaises(FileNotFoundError): 

729 datastore.get(refInner) 

730 

731 def _prepareIngestTest(self): 

732 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

733 dimensions = self.universe.extract(("visit", "physical_filter")) 

734 metrics = makeExampleMetrics() 

735 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"}) 

736 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

737 return metrics, ref 

738 

739 def runIngestTest(self, func, expectOutput=True): 

740 metrics, ref = self._prepareIngestTest() 

741 # The file will be deleted after the test. 

742 # For symlink tests this leads to a situation where the datastore 

743 # points to a file that does not exist. This will make os.path.exist 

744 # return False but then the new symlink will fail with 

745 # FileExistsError later in the code so the test still passes. 

746 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path: 

747 with open(path, "w") as fd: 

748 yaml.dump(metrics._asdict(), stream=fd) 

749 func(metrics, path, ref) 

750 

751 def testIngestNoTransfer(self): 

752 """Test ingesting existing files with no transfer.""" 

753 for mode in (None, "auto"): 

754 # Some datastores have auto but can't do in place transfer 

755 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

756 continue 

757 

758 with self.subTest(mode=mode): 

759 datastore = self.makeDatastore() 

760 

761 def succeed(obj, path, ref): 

762 """Ingest a file already in the datastore root.""" 

763 # first move it into the root, and adjust the path 

764 # accordingly 

765 path = shutil.copy(path, datastore.root.ospath) 

766 path = os.path.relpath(path, start=datastore.root.ospath) 

767 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

768 self.assertEqual(obj, datastore.get(ref)) 

769 

770 def failInputDoesNotExist(obj, path, ref): 

771 """Can't ingest files if we're given a bad path.""" 

772 with self.assertRaises(FileNotFoundError): 

773 datastore.ingest( 

774 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode 

775 ) 

776 self.assertFalse(datastore.exists(ref)) 

777 

778 def failOutsideRoot(obj, path, ref): 

779 """Can't ingest files outside of datastore root unless 

780 auto.""" 

781 if mode == "auto": 

782 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

783 self.assertTrue(datastore.exists(ref)) 

784 else: 

785 with self.assertRaises(RuntimeError): 

786 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

787 self.assertFalse(datastore.exists(ref)) 

788 

789 def failNotImplemented(obj, path, ref): 

790 with self.assertRaises(NotImplementedError): 

791 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

792 

793 if mode in self.ingestTransferModes: 

794 self.runIngestTest(failOutsideRoot) 

795 self.runIngestTest(failInputDoesNotExist) 

796 self.runIngestTest(succeed) 

797 else: 

798 self.runIngestTest(failNotImplemented) 

799 

800 def testIngestTransfer(self): 

801 """Test ingesting existing files after transferring them.""" 

802 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

803 with self.subTest(mode=mode): 

804 datastore = self.makeDatastore(mode) 

805 

806 def succeed(obj, path, ref): 

807 """Ingest a file by transferring it to the template 

808 location.""" 

809 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

810 self.assertEqual(obj, datastore.get(ref)) 

811 

812 def failInputDoesNotExist(obj, path, ref): 

813 """Can't ingest files if we're given a bad path.""" 

814 with self.assertRaises(FileNotFoundError): 

815 # Ensure the file does not look like it is in 

816 # datastore for auto mode 

817 datastore.ingest( 

818 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode 

819 ) 

820 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

821 

822 def failNotImplemented(obj, path, ref): 

823 with self.assertRaises(NotImplementedError): 

824 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

825 

826 if mode in self.ingestTransferModes: 

827 self.runIngestTest(failInputDoesNotExist) 

828 self.runIngestTest(succeed, expectOutput=(mode != "move")) 

829 else: 

830 self.runIngestTest(failNotImplemented) 

831 

832 def testIngestSymlinkOfSymlink(self): 

833 """Special test for symlink to a symlink ingest""" 

834 metrics, ref = self._prepareIngestTest() 

835 # The aim of this test is to create a dataset on disk, then 

836 # create a symlink to it and finally ingest the symlink such that 

837 # the symlink in the datastore points to the original dataset. 

838 for mode in ("symlink", "relsymlink"): 

839 if mode not in self.ingestTransferModes: 

840 continue 

841 

842 print(f"Trying mode {mode}") 

843 with lsst.utils.tests.getTempFilePath(".yaml") as realpath: 

844 with open(realpath, "w") as fd: 

845 yaml.dump(metrics._asdict(), stream=fd) 

846 with lsst.utils.tests.getTempFilePath(".yaml") as sympath: 

847 os.symlink(os.path.abspath(realpath), sympath) 

848 

849 datastore = self.makeDatastore() 

850 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

851 

852 uri = datastore.getURI(ref) 

853 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

854 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

855 

856 linkTarget = os.readlink(uri.ospath) 

857 if mode == "relsymlink": 

858 self.assertFalse(os.path.isabs(linkTarget)) 

859 else: 

860 self.assertEqual(linkTarget, os.path.abspath(realpath)) 

861 

862 # Check that we can get the dataset back regardless of mode 

863 metric2 = datastore.get(ref) 

864 self.assertEqual(metric2, metrics) 

865 

866 # Cleanup the file for next time round loop 

867 # since it will get the same file name in store 

868 datastore.remove(ref) 

869 

870 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]: 

871 datastore = self.makeDatastore(name) 

872 

873 # For now only the FileDatastore can be used for this test. 

874 # ChainedDatastore that only includes InMemoryDatastores have to be 

875 # skipped as well. 

876 for name in datastore.names: 

877 if not name.startswith("InMemoryDatastore"): 

878 break 

879 else: 

880 raise unittest.SkipTest("in-memory datastore does not support record export/import") 

881 

882 metrics = makeExampleMetrics() 

883 dimensions = self.universe.extract(("visit", "physical_filter")) 

884 sc = self.storageClassFactory.getStorageClass("StructuredData") 

885 

886 refs = [] 

887 for visit in (2048, 2049, 2050): 

888 dataId = FakeDataCoordinate.from_dict( 

889 {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime"} 

890 ) 

891 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

892 datastore.put(metrics, ref) 

893 refs.append(ref) 

894 return datastore, refs 

895 

896 def testExportImportRecords(self): 

897 """Test for export_records and import_records methods.""" 

898 datastore, refs = self._populate_export_datastore("test_datastore") 

899 for exported_refs in (refs, refs[1:]): 

900 n_refs = len(exported_refs) 

901 records = datastore.export_records(exported_refs) 

902 self.assertGreater(len(records), 0) 

903 self.assertTrue(set(records.keys()) <= set(datastore.names)) 

904 # In a ChainedDatastore each FileDatastore will have a complete set 

905 for datastore_name in records: 

906 record_data = records[datastore_name] 

907 self.assertEqual(len(record_data.records), n_refs) 

908 

909 # Check that subsetting works, include non-existing dataset ID. 

910 dataset_ids = {exported_refs[0].id, uuid.uuid4()} 

911 subset = record_data.subset(dataset_ids) 

912 assert subset is not None 

913 self.assertEqual(len(subset.records), 1) 

914 subset = record_data.subset({uuid.uuid4()}) 

915 self.assertIsNone(subset) 

916 

917 # Use the same datastore name to import relative path. 

918 datastore2 = self.makeDatastore("test_datastore") 

919 

920 records = datastore.export_records(refs[1:]) 

921 datastore2.import_records(records) 

922 

923 with self.assertRaises(FileNotFoundError): 

924 data = datastore2.get(refs[0]) 

925 data = datastore2.get(refs[1]) 

926 self.assertIsNotNone(data) 

927 data = datastore2.get(refs[2]) 

928 self.assertIsNotNone(data) 

929 

930 def testExport(self): 

931 datastore, refs = self._populate_export_datastore("test_datastore") 

932 

933 datasets = list(datastore.export(refs)) 

934 self.assertEqual(len(datasets), 3) 

935 

936 for transfer in (None, "auto"): 

937 # Both will default to None 

938 datasets = list(datastore.export(refs, transfer=transfer)) 

939 self.assertEqual(len(datasets), 3) 

940 

941 with self.assertRaises(TypeError): 

942 list(datastore.export(refs, transfer="copy")) 

943 

944 with self.assertRaises(TypeError): 

945 list(datastore.export(refs, directory="exportDir", transfer="move")) 

946 

947 # Create a new ref that is not known to the datastore and try to 

948 # export it. 

949 sc = self.storageClassFactory.getStorageClass("ThingOne") 

950 dimensions = self.universe.extract(("visit", "physical_filter")) 

951 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

952 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

953 with self.assertRaises(FileNotFoundError): 

954 list(datastore.export(refs + [ref], transfer=None)) 

955 

956 def test_pydantic_dict_storage_class_conversions(self): 

957 """Test converting a dataset stored as a pydantic model into a dict on 

958 read. 

959 """ 

960 datastore = self.makeDatastore() 

961 store_as_model = self.makeDatasetRef( 

962 "store_as_model", 

963 dimensions=self.universe.empty, 

964 storageClass="DictConvertibleModel", 

965 dataId=DataCoordinate.makeEmpty(self.universe), 

966 ) 

967 content = {"a": "one", "b": "two"} 

968 model = DictConvertibleModel.from_dict(content, extra="original content") 

969 datastore.put(model, store_as_model) 

970 retrieved_model = datastore.get(store_as_model) 

971 self.assertEqual(retrieved_model, model) 

972 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel")) 

973 self.assertEqual(type(loaded), dict) 

974 self.assertEqual(loaded, content) 

975 

976 def test_simple_class_put_get(self): 

977 """Test that we can put and get a simple class with dict() 

978 constructor.""" 

979 datastore = self.makeDatastore() 

980 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

981 self._assert_different_puts(datastore, "MetricsExample", data) 

982 

983 def test_dataclass_put_get(self): 

984 """Test that we can put and get a simple dataclass.""" 

985 datastore = self.makeDatastore() 

986 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

987 self._assert_different_puts(datastore, "MetricsExampleDataclass", data) 

988 

989 def test_pydantic_put_get(self): 

990 """Test that we can put and get a simple Pydantic model.""" 

991 datastore = self.makeDatastore() 

992 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

993 self._assert_different_puts(datastore, "MetricsExampleModel", data) 

994 

995 def test_tuple_put_get(self): 

996 """Test that we can put and get a tuple.""" 

997 datastore = self.makeDatastore() 

998 data = tuple(["a", "b", 1]) 

999 self._assert_different_puts(datastore, "TupleExample", data) 

1000 

1001 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data) -> None: 

1002 refs = { 

1003 x: self.makeDatasetRef( 

1004 f"stora_as_{x}", 

1005 dimensions=self.universe.empty, 

1006 storageClass=f"{storageClass_root}{x}", 

1007 dataId=DataCoordinate.makeEmpty(self.universe), 

1008 ) 

1009 for x in ["A", "B"] 

1010 } 

1011 

1012 for ref in refs.values(): 

1013 datastore.put(data, ref) 

1014 

1015 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"])) 

1016 

1017 

1018class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1019 """PosixDatastore specialization""" 

1020 

1021 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1022 uriScheme = "file" 

1023 canIngestNoTransferAuto = True 

1024 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

1025 isEphemeral = False 

1026 rootKeys = ("root",) 

1027 validationCanFail = True 

1028 

1029 def setUp(self): 

1030 # Override the working directory before calling the base class 

1031 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1032 super().setUp() 

1033 

1034 def testAtomicWrite(self): 

1035 """Test that we write to a temporary and then rename""" 

1036 datastore = self.makeDatastore() 

1037 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1038 dimensions = self.universe.extract(("visit", "physical_filter")) 

1039 metrics = makeExampleMetrics() 

1040 

1041 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"}) 

1042 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

1043 

1044 with self.assertLogs("lsst.resources", "DEBUG") as cm: 

1045 datastore.put(metrics, ref) 

1046 move_logs = [ll for ll in cm.output if "transfer=" in ll] 

1047 self.assertIn("transfer=move", move_logs[0]) 

1048 

1049 # And the transfer should be file to file. 

1050 self.assertEqual(move_logs[0].count("file://"), 2) 

1051 

1052 def testCanNotDeterminePutFormatterLocation(self): 

1053 """Verify that the expected exception is raised if the FileDatastore 

1054 can not determine the put formatter location.""" 

1055 

1056 _ = makeExampleMetrics() 

1057 datastore = self.makeDatastore() 

1058 

1059 # Create multiple storage classes for testing different formulations 

1060 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1061 

1062 sccomp = StorageClass("Dummy") 

1063 compositeStorageClass = StorageClass( 

1064 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp} 

1065 ) 

1066 

1067 dimensions = self.universe.extract(("visit", "physical_filter")) 

1068 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1069 

1070 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

1071 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId, conform=False) 

1072 

1073 def raiser(ref): 

1074 raise DatasetTypeNotSupportedError() 

1075 

1076 with unittest.mock.patch.object( 

1077 lsst.daf.butler.datastores.fileDatastore.FileDatastore, 

1078 "_determine_put_formatter_location", 

1079 side_effect=raiser, 

1080 ): 

1081 # verify the non-composite ref execution path: 

1082 with self.assertRaises(DatasetTypeNotSupportedError): 

1083 datastore.getURIs(ref, predict=True) 

1084 

1085 # verify the composite-ref execution path: 

1086 with self.assertRaises(DatasetTypeNotSupportedError): 

1087 datastore.getURIs(compRef, predict=True) 

1088 

1089 

1090class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

1091 """Posix datastore tests but with checksums disabled.""" 

1092 

1093 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

1094 

1095 def testChecksum(self): 

1096 """Ensure that checksums have not been calculated.""" 

1097 

1098 datastore = self.makeDatastore() 

1099 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1100 dimensions = self.universe.extract(("visit", "physical_filter")) 

1101 metrics = makeExampleMetrics() 

1102 

1103 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"}) 

1104 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

1105 

1106 # Configuration should have disabled checksum calculation 

1107 datastore.put(metrics, ref) 

1108 infos = datastore.getStoredItemsInfo(ref) 

1109 self.assertIsNone(infos[0].checksum) 

1110 

1111 # Remove put back but with checksums enabled explicitly 

1112 datastore.remove(ref) 

1113 datastore.useChecksum = True 

1114 datastore.put(metrics, ref) 

1115 

1116 infos = datastore.getStoredItemsInfo(ref) 

1117 self.assertIsNotNone(infos[0].checksum) 

1118 

1119 

1120class TrashDatastoreTestCase(PosixDatastoreTestCase): 

1121 """Restrict trash test to FileDatastore.""" 

1122 

1123 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1124 

1125 def testTrash(self): 

1126 datastore, *refs = self.prepDeleteTest(n_refs=10) 

1127 

1128 # Trash one of them. 

1129 ref = refs.pop() 

1130 uri = datastore.getURI(ref) 

1131 datastore.trash(ref) 

1132 self.assertTrue(uri.exists(), uri) # Not deleted yet 

1133 datastore.emptyTrash() 

1134 self.assertFalse(uri.exists(), uri) 

1135 

1136 # Trash it again should be fine. 

1137 datastore.trash(ref) 

1138 

1139 # Trash multiple items at once. 

1140 subset = [refs.pop(), refs.pop()] 

1141 datastore.trash(subset) 

1142 datastore.emptyTrash() 

1143 

1144 # Remove a record and trash should do nothing. 

1145 # This is execution butler scenario. 

1146 ref = refs.pop() 

1147 uri = datastore.getURI(ref) 

1148 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

1149 self.assertTrue(uri.exists()) 

1150 datastore.trash(ref) 

1151 datastore.emptyTrash() 

1152 self.assertTrue(uri.exists()) 

1153 

1154 # Switch on trust and it should delete the file. 

1155 datastore.trustGetRequest = True 

1156 datastore.trash([ref]) 

1157 self.assertFalse(uri.exists()) 

1158 

1159 # Remove multiples at once in trust mode. 

1160 subset = [refs.pop() for i in range(3)] 

1161 datastore.trash(subset) 

1162 datastore.trash(refs.pop()) # Check that a single ref can trash 

1163 

1164 

1165class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

1166 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1167 

1168 def setUp(self): 

1169 # Override the working directory before calling the base class 

1170 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1171 super().setUp() 

1172 

1173 def testCleanup(self): 

1174 """Test that a failed formatter write does cleanup a partial file.""" 

1175 metrics = makeExampleMetrics() 

1176 datastore = self.makeDatastore() 

1177 

1178 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1179 

1180 dimensions = self.universe.extract(("visit", "physical_filter")) 

1181 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1182 

1183 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

1184 

1185 # Determine where the file will end up (we assume Formatters use 

1186 # the same file extension) 

1187 expectedUri = datastore.getURI(ref, predict=True) 

1188 self.assertEqual(expectedUri.fragment, "predicted") 

1189 

1190 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}") 

1191 

1192 # Try formatter that fails and formatter that fails and leaves 

1193 # a file behind 

1194 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

1195 with self.subTest(formatter=formatter): 

1196 # Monkey patch the formatter 

1197 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True) 

1198 

1199 # Try to put the dataset, it should fail 

1200 with self.assertRaises(Exception): 

1201 datastore.put(metrics, ref) 

1202 

1203 # Check that there is no file on disk 

1204 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1205 

1206 # Check that there is a directory 

1207 dir = expectedUri.dirname() 

1208 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}") 

1209 

1210 # Force YamlFormatter and check that this time a file is written 

1211 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True) 

1212 datastore.put(metrics, ref) 

1213 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1214 datastore.remove(ref) 

1215 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

1216 

1217 

1218class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1219 """PosixDatastore specialization""" 

1220 

1221 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

1222 uriScheme = "mem" 

1223 hasUnsupportedPut = False 

1224 ingestTransferModes = () 

1225 isEphemeral = True 

1226 rootKeys = None 

1227 validationCanFail = False 

1228 

1229 

1230class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

1231 """ChainedDatastore specialization using a POSIXDatastore""" 

1232 

1233 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

1234 hasUnsupportedPut = False 

1235 canIngestNoTransferAuto = False 

1236 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto") 

1237 isEphemeral = False 

1238 rootKeys = (".datastores.1.root", ".datastores.2.root") 

1239 validationCanFail = True 

1240 

1241 

1242class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

1243 """ChainedDatastore specialization using all InMemoryDatastore""" 

1244 

1245 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

1246 validationCanFail = False 

1247 

1248 

1249class DatastoreConstraintsTests(DatastoreTestsBase): 

1250 """Basic tests of constraints model of Datastores.""" 

1251 

1252 def testConstraints(self): 

1253 """Test constraints model. Assumes that each test class has the 

1254 same constraints.""" 

1255 metrics = makeExampleMetrics() 

1256 datastore = self.makeDatastore() 

1257 

1258 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1259 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1260 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1261 dataId = DataIdForTest({"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}) 

1262 

1263 # Write empty file suitable for ingest check (JSON and YAML variants) 

1264 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1265 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1266 for datasetTypeName, sc, accepted in ( 

1267 ("metric", sc1, True), 

1268 ("metric5", sc1, False), 

1269 ("metric33", sc1, True), 

1270 ("metric5", sc2, True), 

1271 ): 

1272 # Choose different temp file depending on StorageClass 

1273 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1274 

1275 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

1276 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

1277 if accepted: 

1278 datastore.put(metrics, ref) 

1279 self.assertTrue(datastore.exists(ref)) 

1280 datastore.remove(ref) 

1281 

1282 # Try ingest 

1283 if self.canIngest: 

1284 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1285 self.assertTrue(datastore.exists(ref)) 

1286 datastore.remove(ref) 

1287 else: 

1288 with self.assertRaises(DatasetTypeNotSupportedError): 

1289 datastore.put(metrics, ref) 

1290 self.assertFalse(datastore.exists(ref)) 

1291 

1292 # Again with ingest 

1293 if self.canIngest: 

1294 with self.assertRaises(DatasetTypeNotSupportedError): 

1295 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1296 self.assertFalse(datastore.exists(ref)) 

1297 

1298 

1299class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1300 """PosixDatastore specialization""" 

1301 

1302 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

1303 canIngest = True 

1304 

1305 def setUp(self): 

1306 # Override the working directory before calling the base class 

1307 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1308 super().setUp() 

1309 

1310 

1311class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1312 """InMemoryDatastore specialization""" 

1313 

1314 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

1315 canIngest = False 

1316 

1317 

1318class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

1319 """ChainedDatastore specialization using a POSIXDatastore and constraints 

1320 at the ChainedDatstore""" 

1321 

1322 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

1323 

1324 

1325class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

1326 """ChainedDatastore specialization using a POSIXDatastore""" 

1327 

1328 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

1329 

1330 

1331class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

1332 """ChainedDatastore specialization using all InMemoryDatastore""" 

1333 

1334 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

1335 canIngest = False 

1336 

1337 

1338class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

1339 """Test that a chained datastore can control constraints per-datastore 

1340 even if child datastore would accept.""" 

1341 

1342 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

1343 

1344 def setUp(self): 

1345 # Override the working directory before calling the base class 

1346 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1347 super().setUp() 

1348 

1349 def testConstraints(self): 

1350 """Test chained datastore constraints model.""" 

1351 metrics = makeExampleMetrics() 

1352 datastore = self.makeDatastore() 

1353 

1354 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1355 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1356 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1357 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

1358 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"} 

1359 

1360 # Write empty file suitable for ingest check (JSON and YAML variants) 

1361 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1362 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1363 

1364 for typeName, dataId, sc, accept, ingest in ( 

1365 ("metric", dataId1, sc1, (False, True, False), True), 

1366 ("metric5", dataId1, sc1, (False, False, False), False), 

1367 ("metric5", dataId2, sc1, (True, False, False), False), 

1368 ("metric33", dataId2, sc2, (True, True, False), True), 

1369 ("metric5", dataId1, sc2, (False, True, False), True), 

1370 ): 

1371 # Choose different temp file depending on StorageClass 

1372 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1373 

1374 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

1375 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, conform=False) 

1376 if any(accept): 

1377 datastore.put(metrics, ref) 

1378 self.assertTrue(datastore.exists(ref)) 

1379 

1380 # Check each datastore inside the chained datastore 

1381 for childDatastore, expected in zip(datastore.datastores, accept): 

1382 self.assertEqual( 

1383 childDatastore.exists(ref), 

1384 expected, 

1385 f"Testing presence of {ref} in datastore {childDatastore.name}", 

1386 ) 

1387 

1388 datastore.remove(ref) 

1389 

1390 # Check that ingest works 

1391 if ingest: 

1392 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1393 self.assertTrue(datastore.exists(ref)) 

1394 

1395 # Check each datastore inside the chained datastore 

1396 for childDatastore, expected in zip(datastore.datastores, accept): 

1397 # Ephemeral datastores means InMemory at the moment 

1398 # and that does not accept ingest of files. 

1399 if childDatastore.isEphemeral: 

1400 expected = False 

1401 self.assertEqual( 

1402 childDatastore.exists(ref), 

1403 expected, 

1404 f"Testing presence of ingested {ref} in datastore {childDatastore.name}", 

1405 ) 

1406 

1407 datastore.remove(ref) 

1408 else: 

1409 with self.assertRaises(DatasetTypeNotSupportedError): 

1410 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1411 

1412 else: 

1413 with self.assertRaises(DatasetTypeNotSupportedError): 

1414 datastore.put(metrics, ref) 

1415 self.assertFalse(datastore.exists(ref)) 

1416 

1417 # Again with ingest 

1418 with self.assertRaises(DatasetTypeNotSupportedError): 

1419 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1420 self.assertFalse(datastore.exists(ref)) 

1421 

1422 

1423class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase): 

1424 """Tests for datastore caching infrastructure.""" 

1425 

1426 @classmethod 

1427 def setUpClass(cls): 

1428 cls.storageClassFactory = StorageClassFactory() 

1429 cls.universe = DimensionUniverse() 

1430 

1431 # Ensure that we load the test storage class definitions. 

1432 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

1433 cls.storageClassFactory.addFromConfig(scConfigFile) 

1434 

1435 def setUp(self): 

1436 self.id = 0 

1437 

1438 # Create a root that we can use for caching tests. 

1439 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1440 

1441 # Create some test dataset refs and associated test files 

1442 sc = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1443 dimensions = self.universe.extract(("visit", "physical_filter")) 

1444 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1445 

1446 # Create list of refs and list of temporary files 

1447 n_datasets = 10 

1448 self.refs = [ 

1449 self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, conform=False) 

1450 for n in range(n_datasets) 

1451 ] 

1452 

1453 root_uri = ResourcePath(self.root, forceDirectory=True) 

1454 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)] 

1455 

1456 # Create test files. 

1457 for uri in self.files: 

1458 uri.write(b"0123456789") 

1459 

1460 # Create some composite refs with component files. 

1461 sc = self.storageClassFactory.getStorageClass("StructuredData") 

1462 self.composite_refs = [ 

1463 self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, conform=False) for n in range(3) 

1464 ] 

1465 self.comp_files = [] 

1466 self.comp_refs = [] 

1467 for n, ref in enumerate(self.composite_refs): 

1468 component_refs = [] 

1469 component_files = [] 

1470 for component in sc.components: 

1471 component_ref = ref.makeComponentRef(component) 

1472 file = root_uri.join(f"composite_file-{n}-{component}.txt") 

1473 component_refs.append(component_ref) 

1474 component_files.append(file) 

1475 file.write(b"9876543210") 

1476 

1477 self.comp_files.append(component_files) 

1478 self.comp_refs.append(component_refs) 

1479 

1480 def tearDown(self): 

1481 if self.root is not None and os.path.exists(self.root): 

1482 shutil.rmtree(self.root, ignore_errors=True) 

1483 

1484 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager: 

1485 config = Config.fromYaml(config_str) 

1486 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe) 

1487 

1488 def testNoCacheDir(self): 

1489 config_str = """ 

1490cached: 

1491 root: null 

1492 cacheable: 

1493 metric0: true 

1494 """ 

1495 cache_manager = self._make_cache_manager(config_str) 

1496 

1497 # Look inside to check we don't have a cache directory 

1498 self.assertIsNone(cache_manager._cache_directory) 

1499 

1500 self.assertCache(cache_manager) 

1501 

1502 # Test that the cache directory is marked temporary 

1503 self.assertTrue(cache_manager.cache_directory.isTemporary) 

1504 

1505 def testNoCacheDirReversed(self): 

1506 """Use default caching status and metric1 to false""" 

1507 config_str = """ 

1508cached: 

1509 root: null 

1510 default: true 

1511 cacheable: 

1512 metric1: false 

1513 """ 

1514 cache_manager = self._make_cache_manager(config_str) 

1515 

1516 self.assertCache(cache_manager) 

1517 

1518 def testEnvvarCacheDir(self): 

1519 config_str = f""" 

1520cached: 

1521 root: '{self.root}' 

1522 cacheable: 

1523 metric0: true 

1524 """ 

1525 

1526 root = ResourcePath(self.root, forceDirectory=True) 

1527 env_dir = root.join("somewhere", forceDirectory=True) 

1528 elsewhere = root.join("elsewhere", forceDirectory=True) 

1529 

1530 # Environment variable should override the config value. 

1531 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}): 

1532 cache_manager = self._make_cache_manager(config_str) 

1533 self.assertEqual(cache_manager.cache_directory, env_dir) 

1534 

1535 # This environment variable should not override the config value. 

1536 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1537 cache_manager = self._make_cache_manager(config_str) 

1538 self.assertEqual(cache_manager.cache_directory, root) 

1539 

1540 # No default setting. 

1541 config_str = """ 

1542cached: 

1543 root: null 

1544 default: true 

1545 cacheable: 

1546 metric1: false 

1547 """ 

1548 cache_manager = self._make_cache_manager(config_str) 

1549 

1550 # This environment variable should override the config value. 

1551 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1552 cache_manager = self._make_cache_manager(config_str) 

1553 self.assertEqual(cache_manager.cache_directory, env_dir) 

1554 

1555 # If both environment variables are set the main (not IF_UNSET) 

1556 # variable should win. 

1557 with unittest.mock.patch.dict( 

1558 os.environ, 

1559 { 

1560 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath, 

1561 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath, 

1562 }, 

1563 ): 

1564 cache_manager = self._make_cache_manager(config_str) 

1565 self.assertEqual(cache_manager.cache_directory, env_dir) 

1566 

1567 # Use the API to set the environment variable, making sure that the 

1568 # variable is reset on exit. 

1569 with unittest.mock.patch.dict( 

1570 os.environ, 

1571 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1572 ): 

1573 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1574 self.assertTrue(defined) 

1575 cache_manager = self._make_cache_manager(config_str) 

1576 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1577 

1578 # Now create the cache manager ahead of time and set the fallback 

1579 # later. 

1580 cache_manager = self._make_cache_manager(config_str) 

1581 self.assertIsNone(cache_manager._cache_directory) 

1582 with unittest.mock.patch.dict( 

1583 os.environ, 

1584 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1585 ): 

1586 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1587 self.assertTrue(defined) 

1588 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1589 

1590 def testExplicitCacheDir(self): 

1591 config_str = f""" 

1592cached: 

1593 root: '{self.root}' 

1594 cacheable: 

1595 metric0: true 

1596 """ 

1597 cache_manager = self._make_cache_manager(config_str) 

1598 

1599 # Look inside to check we do have a cache directory. 

1600 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True)) 

1601 

1602 self.assertCache(cache_manager) 

1603 

1604 # Test that the cache directory is not marked temporary 

1605 self.assertFalse(cache_manager.cache_directory.isTemporary) 

1606 

1607 def assertCache(self, cache_manager): 

1608 self.assertTrue(cache_manager.should_be_cached(self.refs[0])) 

1609 self.assertFalse(cache_manager.should_be_cached(self.refs[1])) 

1610 

1611 uri = cache_manager.move_to_cache(self.files[0], self.refs[0]) 

1612 self.assertIsInstance(uri, ResourcePath) 

1613 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1])) 

1614 

1615 # Check presence in cache using ref and then using file extension. 

1616 self.assertFalse(cache_manager.known_to_cache(self.refs[1])) 

1617 self.assertTrue(cache_manager.known_to_cache(self.refs[0])) 

1618 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension())) 

1619 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension())) 

1620 

1621 # Cached file should no longer exist but uncached file should be 

1622 # unaffected. 

1623 self.assertFalse(self.files[0].exists()) 

1624 self.assertTrue(self.files[1].exists()) 

1625 

1626 # Should find this file and it should be within the cache directory. 

1627 with cache_manager.find_in_cache(self.refs[0], ".txt") as found: 

1628 self.assertTrue(found.exists()) 

1629 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory)) 

1630 

1631 # Should not be able to find these in cache 

1632 with cache_manager.find_in_cache(self.refs[0], ".fits") as found: 

1633 self.assertIsNone(found) 

1634 with cache_manager.find_in_cache(self.refs[1], ".fits") as found: 

1635 self.assertIsNone(found) 

1636 

1637 def testNoCache(self): 

1638 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe) 

1639 for uri, ref in zip(self.files, self.refs): 

1640 self.assertFalse(cache_manager.should_be_cached(ref)) 

1641 self.assertIsNone(cache_manager.move_to_cache(uri, ref)) 

1642 self.assertFalse(cache_manager.known_to_cache(ref)) 

1643 with cache_manager.find_in_cache(ref, ".txt") as found: 

1644 self.assertIsNone(found, msg=f"{cache_manager}") 

1645 

1646 def _expiration_config(self, mode: str, threshold: int) -> str: 

1647 return f""" 

1648cached: 

1649 default: true 

1650 expiry: 

1651 mode: {mode} 

1652 threshold: {threshold} 

1653 cacheable: 

1654 unused: true 

1655 """ 

1656 

1657 def testCacheExpiryFiles(self): 

1658 threshold = 2 # Keep at least 2 files. 

1659 mode = "files" 

1660 config_str = self._expiration_config(mode, threshold) 

1661 

1662 cache_manager = self._make_cache_manager(config_str) 

1663 

1664 # Check that an empty cache returns unknown for arbitrary ref 

1665 self.assertFalse(cache_manager.known_to_cache(self.refs[0])) 

1666 

1667 # Should end with datasets: 2, 3, 4 

1668 self.assertExpiration(cache_manager, 5, threshold + 1) 

1669 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1670 

1671 # Check that we will not expire a file that is actively in use. 

1672 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1673 self.assertIsNotNone(found) 

1674 

1675 # Trigger cache expiration that should remove the file 

1676 # we just retrieved. Should now have: 3, 4, 5 

1677 cached = cache_manager.move_to_cache(self.files[5], self.refs[5]) 

1678 self.assertIsNotNone(cached) 

1679 

1680 # Cache should still report the standard file count. 

1681 self.assertEqual(cache_manager.file_count, threshold + 1) 

1682 

1683 # Add additional entry to cache. 

1684 # Should now have 4, 5, 6 

1685 cached = cache_manager.move_to_cache(self.files[6], self.refs[6]) 

1686 self.assertIsNotNone(cached) 

1687 

1688 # Is the file still there? 

1689 self.assertTrue(found.exists()) 

1690 

1691 # Can we read it? 

1692 data = found.read() 

1693 self.assertGreater(len(data), 0) 

1694 

1695 # Outside context the file should no longer exist. 

1696 self.assertFalse(found.exists()) 

1697 

1698 # File count should not have changed. 

1699 self.assertEqual(cache_manager.file_count, threshold + 1) 

1700 

1701 # Dataset 2 was in the exempt directory but because hardlinks 

1702 # are used it was deleted from the main cache during cache expiry 

1703 # above and so should no longer be found. 

1704 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1705 self.assertIsNone(found) 

1706 

1707 # And the one stored after it is also gone. 

1708 with cache_manager.find_in_cache(self.refs[3], ".txt") as found: 

1709 self.assertIsNone(found) 

1710 

1711 # But dataset 4 is present. 

1712 with cache_manager.find_in_cache(self.refs[4], ".txt") as found: 

1713 self.assertIsNotNone(found) 

1714 

1715 # Adding a new dataset to the cache should now delete it. 

1716 cache_manager.move_to_cache(self.files[7], self.refs[7]) 

1717 

1718 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1719 self.assertIsNone(found) 

1720 

1721 def testCacheExpiryDatasets(self): 

1722 threshold = 2 # Keep 2 datasets. 

1723 mode = "datasets" 

1724 config_str = self._expiration_config(mode, threshold) 

1725 

1726 cache_manager = self._make_cache_manager(config_str) 

1727 self.assertExpiration(cache_manager, 5, threshold + 1) 

1728 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1729 

1730 def testCacheExpiryDatasetsComposite(self): 

1731 threshold = 2 # Keep 2 datasets. 

1732 mode = "datasets" 

1733 config_str = self._expiration_config(mode, threshold) 

1734 

1735 cache_manager = self._make_cache_manager(config_str) 

1736 

1737 n_datasets = 3 

1738 for i in range(n_datasets): 

1739 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]): 

1740 cached = cache_manager.move_to_cache(component_file, component_ref) 

1741 self.assertIsNotNone(cached) 

1742 self.assertTrue(cache_manager.known_to_cache(component_ref)) 

1743 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef())) 

1744 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension())) 

1745 

1746 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files 

1747 

1748 # Write two new non-composite and the number of files should drop. 

1749 self.assertExpiration(cache_manager, 2, 5) 

1750 

1751 def testCacheExpirySize(self): 

1752 threshold = 55 # Each file is 10 bytes 

1753 mode = "size" 

1754 config_str = self._expiration_config(mode, threshold) 

1755 

1756 cache_manager = self._make_cache_manager(config_str) 

1757 self.assertExpiration(cache_manager, 10, 6) 

1758 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1759 

1760 def assertExpiration(self, cache_manager, n_datasets, n_retained): 

1761 """Insert the datasets and then check the number retained.""" 

1762 for i in range(n_datasets): 

1763 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1764 self.assertIsNotNone(cached) 

1765 

1766 self.assertEqual(cache_manager.file_count, n_retained) 

1767 

1768 # The oldest file should not be in the cache any more. 

1769 for i in range(n_datasets): 

1770 with cache_manager.find_in_cache(self.refs[i], ".txt") as found: 

1771 if i >= n_datasets - n_retained: 

1772 self.assertIsInstance(found, ResourcePath) 

1773 else: 

1774 self.assertIsNone(found) 

1775 

1776 def testCacheExpiryAge(self): 

1777 threshold = 1 # Expire older than 2 seconds 

1778 mode = "age" 

1779 config_str = self._expiration_config(mode, threshold) 

1780 

1781 cache_manager = self._make_cache_manager(config_str) 

1782 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1783 

1784 # Insert 3 files, then sleep, then insert more. 

1785 for i in range(2): 

1786 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1787 self.assertIsNotNone(cached) 

1788 time.sleep(2.0) 

1789 for j in range(4): 

1790 i = 2 + j # Continue the counting 

1791 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1792 self.assertIsNotNone(cached) 

1793 

1794 # Only the files written after the sleep should exist. 

1795 self.assertEqual(cache_manager.file_count, 4) 

1796 with cache_manager.find_in_cache(self.refs[1], ".txt") as found: 

1797 self.assertIsNone(found) 

1798 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1799 self.assertIsInstance(found, ResourcePath) 

1800 

1801 

1802class DatasetRefURIsTestCase(unittest.TestCase): 

1803 """Tests for DatasetRefURIs.""" 

1804 

1805 def testSequenceAccess(self): 

1806 """Verify that DatasetRefURIs can be treated like a two-item tuple.""" 

1807 uris = DatasetRefURIs() 

1808 

1809 self.assertEqual(len(uris), 2) 

1810 self.assertEqual(uris[0], None) 

1811 self.assertEqual(uris[1], {}) 

1812 

1813 primaryURI = ResourcePath("1/2/3") 

1814 componentURI = ResourcePath("a/b/c") 

1815 

1816 # affirm that DatasetRefURIs does not support MutableSequence functions 

1817 with self.assertRaises(TypeError): 

1818 uris[0] = primaryURI 

1819 with self.assertRaises(TypeError): 

1820 uris[1] = {"foo": componentURI} 

1821 

1822 # but DatasetRefURIs can be set by property name: 

1823 uris.primaryURI = primaryURI 

1824 uris.componentURIs = {"foo": componentURI} 

1825 self.assertEqual(uris.primaryURI, primaryURI) 

1826 self.assertEqual(uris[0], primaryURI) 

1827 

1828 primary, components = uris 

1829 self.assertEqual(primary, primaryURI) 

1830 self.assertEqual(components, {"foo": componentURI}) 

1831 

1832 def testRepr(self): 

1833 """Verify __repr__ output.""" 

1834 uris = DatasetRefURIs(ResourcePath("/1/2/3"), {"comp": ResourcePath("/a/b/c")}) 

1835 self.assertEqual( 

1836 repr(uris), 

1837 'DatasetRefURIs(ResourcePath("file:///1/2/3"), {\'comp\': ResourcePath("file:///a/b/c")})', 

1838 ) 

1839 

1840 

1841class DataIdForTestTestCase(unittest.TestCase): 

1842 """Tests for the DataIdForTest class.""" 

1843 

1844 def testImmutable(self): 

1845 """Verify that an instance is immutable by default.""" 

1846 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1847 initial_hash = hash(dataId) 

1848 

1849 with self.assertRaises(RuntimeError): 

1850 dataId["instrument"] = "foo" 

1851 

1852 with self.assertRaises(RuntimeError): 

1853 del dataId["instrument"] 

1854 

1855 assert sys.version_info[0] == 3 

1856 if sys.version_info[1] >= 9: 

1857 with self.assertRaises(RuntimeError): 

1858 dataId |= dict(foo="bar") 

1859 

1860 with self.assertRaises(RuntimeError): 

1861 dataId.pop("instrument") 

1862 

1863 with self.assertRaises(RuntimeError): 

1864 dataId.popitem() 

1865 

1866 with self.assertRaises(RuntimeError): 

1867 dataId.update(dict(instrument="foo")) 

1868 

1869 # verify that the hash value has not changed. 

1870 self.assertEqual(initial_hash, hash(dataId)) 

1871 

1872 def testMutable(self): 

1873 """Verify that an instance can be made mutable (unfrozen).""" 

1874 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1875 initial_hash = hash(dataId) 

1876 dataId.frozen = False 

1877 self.assertEqual(initial_hash, hash(dataId)) 

1878 

1879 dataId["instrument"] = "foo" 

1880 self.assertEqual(dataId["instrument"], "foo") 

1881 self.assertNotEqual(initial_hash, hash(dataId)) 

1882 initial_hash = hash(dataId) 

1883 

1884 del dataId["instrument"] 

1885 self.assertTrue("instrument" not in dataId) 

1886 self.assertNotEqual(initial_hash, hash(dataId)) 

1887 initial_hash = hash(dataId) 

1888 

1889 assert sys.version_info[0] == 3 

1890 if sys.version_info[1] >= 9: 

1891 dataId |= dict(foo="bar") 

1892 self.assertEqual(dataId["foo"], "bar") 

1893 self.assertNotEqual(initial_hash, hash(dataId)) 

1894 initial_hash = hash(dataId) 

1895 

1896 dataId.pop("visit") 

1897 self.assertTrue("visit" not in dataId) 

1898 self.assertNotEqual(initial_hash, hash(dataId)) 

1899 initial_hash = hash(dataId) 

1900 

1901 dataId.popitem() 

1902 self.assertTrue("physical_filter" not in dataId) 

1903 self.assertNotEqual(initial_hash, hash(dataId)) 

1904 initial_hash = hash(dataId) 

1905 

1906 dataId.update(dict(instrument="foo")) 

1907 self.assertEqual(dataId["instrument"], "foo") 

1908 self.assertNotEqual(initial_hash, hash(dataId)) 

1909 initial_hash = hash(dataId) 

1910 

1911 

1912class StoredFileInfoTestCase(DatasetTestHelper, unittest.TestCase): 

1913 storageClassFactory = StorageClassFactory() 

1914 

1915 def test_StoredFileInfo(self): 

1916 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1917 ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}, conform=False) 

1918 

1919 record = dict( 

1920 storage_class="StructuredDataDict", 

1921 formatter="lsst.daf.butler.Formatter", 

1922 path="a/b/c.txt", 

1923 component="component", 

1924 dataset_id=ref.id, 

1925 checksum=None, 

1926 file_size=5, 

1927 ) 

1928 info = StoredFileInfo.from_record(record) 

1929 

1930 self.assertEqual(info.dataset_id, ref.id) 

1931 self.assertEqual(info.to_record(), record) 

1932 

1933 ref2 = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}, conform=False) 

1934 rebased = info.rebase(ref2) 

1935 self.assertEqual(rebased.dataset_id, ref2.id) 

1936 self.assertEqual(rebased.rebase(ref), info) 

1937 

1938 with self.assertRaises(TypeError): 

1939 rebased.update(formatter=42) 

1940 

1941 with self.assertRaises(ValueError): 

1942 rebased.update(something=42, new="42") 

1943 

1944 # Check that pickle works on StoredFileInfo. 

1945 pickled_info = pickle.dumps(info) 

1946 unpickled_info = pickle.loads(pickled_info) 

1947 self.assertEqual(unpickled_info, info) 

1948 

1949 

1950if __name__ == "__main__": 

1951 unittest.main()